Bläddra i källkod

merge fxt->paje

Andra Hugo 13 år sedan
förälder
incheckning
9ed1d8b7f8

+ 27 - 25
src/common/barrier.c

@@ -17,71 +17,73 @@
 #include <common/barrier.h>
 #include <common/utils.h>
 
-int _starpu_barrier_init(_starpu_barrier_t *barrier, int count)
+int _starpu_barrier_init(struct _starpu_barrier *barrier, int count)
 {
 	barrier->count = count;
 	barrier->reached_start = 0;
 	barrier->reached_exit = 0;
-	PTHREAD_MUTEX_INIT(&barrier->mutex, NULL);
-	PTHREAD_MUTEX_INIT(&barrier->mutex_exit, NULL);
-	PTHREAD_COND_INIT(&barrier->cond, NULL);
+	_STARPU_PTHREAD_MUTEX_INIT(&barrier->mutex, NULL);
+	_STARPU_PTHREAD_MUTEX_INIT(&barrier->mutex_exit, NULL);
+	_STARPU_PTHREAD_COND_INIT(&barrier->cond, NULL);
 	return 0;
 }
 
 static
-int _starpu_barrier_test(_starpu_barrier_t *barrier)
+int _starpu_barrier_test(struct _starpu_barrier *barrier)
 {
-    /*
-     * Check whether any threads are known to be waiting; report
-     * "BUSY" if so.
-     */
-        PTHREAD_MUTEX_LOCK(&barrier->mutex_exit);
-        if (barrier->reached_exit != barrier->count) {
-                PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
+	/*
+	 * Check whether any threads are known to be waiting; report
+	 * "BUSY" if so.
+	 */
+        _STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit);
+        if (barrier->reached_exit != barrier->count)
+	{
+                _STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
                 return EBUSY;
         }
-        PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
+        _STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
         return 0;
 }
 
-int _starpu_barrier_destroy(_starpu_barrier_t *barrier)
+int _starpu_barrier_destroy(struct _starpu_barrier *barrier)
 {
 	int ret = _starpu_barrier_test(barrier);
-	while (ret == EBUSY) {
+	while (ret == EBUSY)
+	{
 		ret = _starpu_barrier_test(barrier);
 	}
 	_STARPU_DEBUG("reached_exit %d\n", barrier->reached_exit);
 
-	PTHREAD_MUTEX_DESTROY(&barrier->mutex);
-	PTHREAD_MUTEX_DESTROY(&barrier->mutex_exit);
-	PTHREAD_COND_DESTROY(&barrier->cond);
+	_STARPU_PTHREAD_MUTEX_DESTROY(&barrier->mutex);
+	_STARPU_PTHREAD_MUTEX_DESTROY(&barrier->mutex_exit);
+	_STARPU_PTHREAD_COND_DESTROY(&barrier->cond);
 	return 0;
 }
 
-int _starpu_barrier_wait(_starpu_barrier_t *barrier)
+int _starpu_barrier_wait(struct _starpu_barrier *barrier)
 {
 	int ret=0;
 
         // Wait until all threads enter the barrier
-	PTHREAD_MUTEX_LOCK(&barrier->mutex);
+	_STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex);
 	barrier->reached_exit=0;
 	barrier->reached_start++;
 	if (barrier->reached_start == barrier->count)
 	{
 		barrier->reached_start = 0;
-		PTHREAD_COND_BROADCAST(&barrier->cond);
+		_STARPU_PTHREAD_COND_BROADCAST(&barrier->cond);
 		ret = PTHREAD_BARRIER_SERIAL_THREAD;
 	}
 	else
 	{
-                PTHREAD_COND_WAIT(&barrier->cond,&barrier->mutex);
+                _STARPU_PTHREAD_COND_WAIT(&barrier->cond,&barrier->mutex);
 	}
-	PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
+	_STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex);
 
         // Count number of threads that exit the barrier
-	PTHREAD_MUTEX_LOCK(&barrier->mutex_exit);
+	_STARPU_PTHREAD_MUTEX_LOCK(&barrier->mutex_exit);
 	barrier->reached_exit ++;
-	PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
+	_STARPU_PTHREAD_MUTEX_UNLOCK(&barrier->mutex_exit);
 
 	return ret;
 }

+ 7 - 6
src/common/barrier.h

@@ -19,24 +19,25 @@
 
 #include <pthread.h>
 
-typedef struct {
+struct _starpu_barrier
+{
 	int count;
 	int reached_start;
 	int reached_exit;
 	pthread_mutex_t mutex;
 	pthread_mutex_t mutex_exit;
 	pthread_cond_t cond;
-} _starpu_barrier_t;
+};
 
-int _starpu_barrier_init(_starpu_barrier_t *barrier, int count);
+int _starpu_barrier_init(struct _starpu_barrier *barrier, int count);
 
-int _starpu_barrier_destroy(_starpu_barrier_t *barrier);
+int _starpu_barrier_destroy(struct _starpu_barrier *barrier);
 
-int _starpu_barrier_wait(_starpu_barrier_t *barrier);
+int _starpu_barrier_wait(struct _starpu_barrier *barrier);
 
 #if !defined(PTHREAD_BARRIER_SERIAL_THREAD)
 #  define PTHREAD_BARRIER_SERIAL_THREAD -1
-#  define pthread_barrier_t _starpu_barrier_t
+#  define pthread_barrier_t struct _starpu_barrier
 #  define pthread_barrier_init(b,a,c) _starpu_barrier_init(b, c)
 #  define pthread_barrier_destroy(b) _starpu_barrier_destroy(b)
 #  define pthread_barrier_wait(b) _starpu_barrier_wait(b)

+ 28 - 24
src/common/fxt.c

@@ -23,21 +23,22 @@
 
 #ifdef STARPU_USE_FXT
 #include <common/fxt.h>
+#include <starpu_fxt.h>
 
 #ifdef STARPU_HAVE_WINDOWS
 #include <windows.h>
 #endif
-		
-#define PROF_BUFFER_SIZE  (8*1024*1024)
 
-static char PROF_FILE_USER[128];
-static int fxt_started = 0;
+#define _STARPU_PROF_BUFFER_SIZE  (8*1024*1024)
 
-static int written = 0;
+static char _STARPU_PROF_FILE_USER[128];
+static int _starpu_fxt_started = 0;
 
-static int id;
+static int _starpu_written = 0;
 
-static void _profile_set_tracefile(void *last, ...)
+static int _starpu_id;
+
+static void _starpu_profile_set_tracefile(void *last, ...)
 {
 	va_list vl;
 	char *user;
@@ -47,7 +48,7 @@ static void _profile_set_tracefile(void *last, ...)
 			fxt_prefix = "/tmp/";
 
 	va_start(vl, last);
-	vsprintf(PROF_FILE_USER, fxt_prefix, vl);
+	vsprintf(_STARPU_PROF_FILE_USER, fxt_prefix, vl);
 	va_end(vl);
 
 	user = getenv("USER");
@@ -55,31 +56,34 @@ static void _profile_set_tracefile(void *last, ...)
 		user = "";
 
 	char suffix[128];
-	snprintf(suffix, 128, "prof_file_%s_%d", user, id);
+	snprintf(suffix, 128, "prof_file_%s_%d", user, _starpu_id);
 
-	strcat(PROF_FILE_USER, suffix);
+	strcat(_STARPU_PROF_FILE_USER, suffix);
 }
 
-void starpu_set_profiling_id(int new_id) {
+void starpu_set_profiling_id(int new_id)
+{
         _STARPU_DEBUG("Set id to <%d>\n", new_id);
-	id = new_id;
-        _profile_set_tracefile(NULL);
+	_starpu_id = new_id;
+        _starpu_profile_set_tracefile(NULL);
 }
 
 void _starpu_start_fxt_profiling(void)
 {
 	unsigned threadid;
 
-	if (!fxt_started) {
-		fxt_started = 1;
-		_profile_set_tracefile(NULL);
+	if (!_starpu_fxt_started)
+	{
+		_starpu_fxt_started = 1;
+		_starpu_profile_set_tracefile(NULL);
 	}
 
 	threadid = syscall(SYS_gettid);
 
 	atexit(_starpu_stop_fxt_profiling);
 
-	if(fut_setup(PROF_BUFFER_SIZE, FUT_KEYMASKALL, threadid) < 0) {
+	if (fut_setup(_STARPU_PROF_BUFFER_SIZE, FUT_KEYMASKALL, threadid) < 0)
+	{
 		perror("fut_setup");
 		STARPU_ABORT();
 	}
@@ -89,7 +93,7 @@ void _starpu_start_fxt_profiling(void)
 	return;
 }
 
-static void generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
+static void _starpu_generate_paje_trace(char *input_fxt_filename, char *output_paje_filename)
 {
 	/* We take default options */
 	struct starpu_fxt_options options;
@@ -108,19 +112,19 @@ static void generate_paje_trace(char *input_fxt_filename, char *output_paje_file
 
 void _starpu_stop_fxt_profiling(void)
 {
-	if (!written)
+	if (!_starpu_written)
 	{
 #ifdef STARPU_VERBOSE
 	        char hostname[128];
 		gethostname(hostname, 128);
-		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, PROF_FILE_USER);
+		fprintf(stderr, "Writing FxT traces into file %s:%s\n", hostname, _STARPU_PROF_FILE_USER);
 #endif
-		fut_endup(PROF_FILE_USER);
+		fut_endup(_STARPU_PROF_FILE_USER);
 
 		/* Should we generate a Paje trace directly ? */
 		int generate_trace = starpu_get_env_number("STARPU_GENERATE_TRACE");
 		if (generate_trace == 1)
-			generate_paje_trace(PROF_FILE_USER, "paje.trace");
+			_starpu_generate_paje_trace(_STARPU_PROF_FILE_USER, "paje.trace");
 
 		int ret = fut_done();
 		if (ret < 0)
@@ -130,7 +134,7 @@ void _starpu_stop_fxt_profiling(void)
 			fprintf(stderr, "Warning: the FxT trace could not be generated properly\n");
 		}
 
-		written = 1;
+		_starpu_written = 1;
 	}
 }
 
@@ -144,6 +148,6 @@ void _starpu_fxt_register_thread(unsigned cpuid)
 void starpu_trace_user_event(unsigned long code STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_USE_FXT
-	STARPU_TRACE_USER_EVENT(code);
+	_STARPU_TRACE_USER_EVENT(code);
 #endif
 }

+ 189 - 179
src/common/fxt.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009-2011  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -32,72 +32,75 @@
 #include <starpu.h>
 
 /* some key to identify the worker kind */
-#define STARPU_FUT_APPS_KEY	0x100
-#define STARPU_FUT_CPU_KEY	0x101
-#define STARPU_FUT_CUDA_KEY	0x102
-#define STARPU_FUT_OPENCL_KEY	0x103
+#define _STARPU_FUT_APPS_KEY	0x100
+#define _STARPU_FUT_CPU_KEY	0x101
+#define _STARPU_FUT_CUDA_KEY	0x102
+#define _STARPU_FUT_OPENCL_KEY	0x103
 
-#define STARPU_FUT_WORKER_INIT_START	0x5100
-#define STARPU_FUT_WORKER_INIT_END	0x5101
+#define _STARPU_FUT_WORKER_INIT_START	0x5100
+#define _STARPU_FUT_WORKER_INIT_END	0x5101
 
-#define	STARPU_FUT_START_CODELET_BODY	0x5102
-#define	STARPU_FUT_END_CODELET_BODY	0x5103
+#define	_STARPU_FUT_START_CODELET_BODY	0x5102
+#define	_STARPU_FUT_END_CODELET_BODY	0x5103
 
-#define STARPU_FUT_JOB_PUSH		0x5104
-#define STARPU_FUT_JOB_POP		0x5105
+#define _STARPU_FUT_JOB_PUSH		0x5104
+#define _STARPU_FUT_JOB_POP		0x5105
 
-#define STARPU_FUT_UPDATE_TASK_CNT	0x5106
+#define _STARPU_FUT_UPDATE_TASK_CNT	0x5106
 
-#define STARPU_FUT_START_FETCH_INPUT	0x5107
-#define STARPU_FUT_END_FETCH_INPUT	0x5108
-#define STARPU_FUT_START_PUSH_OUTPUT	0x5109
-#define STARPU_FUT_END_PUSH_OUTPUT	0x5110
+#define _STARPU_FUT_START_FETCH_INPUT	0x5107
+#define _STARPU_FUT_END_FETCH_INPUT	0x5108
+#define _STARPU_FUT_START_PUSH_OUTPUT	0x5109
+#define _STARPU_FUT_END_PUSH_OUTPUT	0x5110
 
-#define STARPU_FUT_TAG		0x5111
-#define STARPU_FUT_TAG_DEPS	0x5112
+#define _STARPU_FUT_TAG		0x5111
+#define _STARPU_FUT_TAG_DEPS	0x5112
 
-#define STARPU_FUT_TASK_DEPS		0x5113
+#define _STARPU_FUT_TASK_DEPS		0x5113
 
-#define STARPU_FUT_DATA_COPY		0x5114
-#define STARPU_FUT_WORK_STEALING	0x5115
+#define _STARPU_FUT_DATA_COPY		0x5114
+#define _STARPU_FUT_WORK_STEALING	0x5115
 
-#define STARPU_FUT_WORKER_DEINIT_START	0x5116
-#define STARPU_FUT_WORKER_DEINIT_END	0x5117
+#define _STARPU_FUT_WORKER_DEINIT_START	0x5116
+#define _STARPU_FUT_WORKER_DEINIT_END	0x5117
 
-#define STARPU_FUT_WORKER_SLEEP_START	0x5118
-#define STARPU_FUT_WORKER_SLEEP_END	0x5119
+#define _STARPU_FUT_WORKER_SLEEP_START	0x5118
+#define _STARPU_FUT_WORKER_SLEEP_END	0x5119
 
-#define STARPU_FUT_USER_DEFINED_START	0x5120
-#define STARPU_FUT_USER_DEFINED_END	0x5121
+#define _STARPU_FUT_USER_DEFINED_START	0x5120
+#define _STARPU_FUT_USER_DEFINED_END	0x5121
 
-#define	STARPU_FUT_NEW_MEM_NODE		0x5122
+#define	_STARPU_FUT_NEW_MEM_NODE		0x5122
 
-#define	STARPU_FUT_START_CALLBACK	0x5123
-#define	STARPU_FUT_END_CALLBACK		0x5124
+#define	_STARPU_FUT_START_CALLBACK	0x5123
+#define	_STARPU_FUT_END_CALLBACK		0x5124
 
-#define	STARPU_FUT_TASK_DONE		0x5125
-#define	STARPU_FUT_TAG_DONE		0x5126
+#define	_STARPU_FUT_TASK_DONE		0x5125
+#define	_STARPU_FUT_TAG_DONE		0x5126
 
-#define	STARPU_FUT_START_ALLOC		0x5127
-#define	STARPU_FUT_END_ALLOC		0x5128
+#define	_STARPU_FUT_START_ALLOC		0x5127
+#define	_STARPU_FUT_END_ALLOC		0x5128
 
-#define	STARPU_FUT_START_ALLOC_REUSE	0x5129
-#define	STARPU_FUT_END_ALLOC_REUSE	0x5130
+#define	_STARPU_FUT_START_ALLOC_REUSE	0x5129
+#define	_STARPU_FUT_END_ALLOC_REUSE	0x5130
 
-#define	STARPU_FUT_START_MEMRECLAIM	0x5131
-#define	STARPU_FUT_END_MEMRECLAIM	0x5132
+#define	_STARPU_FUT_START_MEMRECLAIM	0x5131
+#define	_STARPU_FUT_END_MEMRECLAIM	0x5132
 
-#define	STARPU_FUT_START_DRIVER_COPY	0x5133
-#define	STARPU_FUT_END_DRIVER_COPY	0x5134
+#define	_STARPU_FUT_START_DRIVER_COPY	0x5133
+#define	_STARPU_FUT_END_DRIVER_COPY	0x5134
 
-#define	STARPU_FUT_START_PROGRESS	0x5135
-#define	STARPU_FUT_END_PROGRESS		0x5136
+#define	_STARPU_FUT_START_DRIVER_COPY_ASYNC	0x5135
+#define	_STARPU_FUT_END_DRIVER_COPY_ASYNC	0x5136
 
-#define STARPU_FUT_USER_EVENT		0x5137
+#define	_STARPU_FUT_START_PROGRESS	0x5137
+#define	_STARPU_FUT_END_PROGRESS		0x5138
 
-#define STARPU_FUT_SET_PROFILING	0x5138
+#define _STARPU_FUT_USER_EVENT		0x5139
 
-#define STARPU_FUT_TASK_WAIT_FOR_ALL	0x5139
+#define _STARPU_FUT_SET_PROFILING	0x513a
+
+#define _STARPU_FUT_TASK_WAIT_FOR_ALL	0x513b
 
 #ifdef STARPU_USE_FXT
 #include <sys/syscall.h> /* pour les définitions de SYS_xxx */
@@ -117,7 +120,7 @@ void _starpu_fxt_register_thread(unsigned);
 /* Sometimes we need something a little more specific than the wrappers from
  * FxT: these macro permit to put add an event with 3 (or 4) numbers followed
  * by a string. */
-#define STARPU_FUT_DO_PROBE3STR(CODE, P1, P2, P3, str)			\
+#define _STARPU_FUT_DO_PROBE3STR(CODE, P1, P2, P3, str)			\
 do {									\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
@@ -134,7 +137,7 @@ do {									\
 	((char *)futargs)[len - 1] = '\0';				\
 } while (0);
 
-#define STARPU_FUT_DO_PROBE4STR(CODE, P1, P2, P3, P4, str)		\
+#define _STARPU_FUT_DO_PROBE4STR(CODE, P1, P2, P3, P4, str)		\
 do {									\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
@@ -152,7 +155,7 @@ do {									\
 	((char *)futargs)[len - 1] = '\0';				\
 } while (0);
 
-#define STARPU_FUT_DO_PROBE5STR(CODE, P1, P2, P3, P4, P5, str)		\
+#define _STARPU_FUT_DO_PROBE5STR(CODE, P1, P2, P3, P4, P5, str)		\
 do {									\
 	/* No more than FXT_MAX_PARAMS args are allowed */		\
 	/* we add a \0 just in case ... */				\
@@ -173,213 +176,220 @@ do {									\
 
 
 
-/* workerkind = STARPU_FUT_CPU_KEY for instance */
-#define STARPU_TRACE_NEW_MEM_NODE(nodeid)			\
-	FUT_DO_PROBE2(STARPU_FUT_NEW_MEM_NODE, nodeid, syscall(SYS_gettid));
+/* workerkind = _STARPU_FUT_CPU_KEY for instance */
+#define _STARPU_TRACE_NEW_MEM_NODE(nodeid)			\
+	FUT_DO_PROBE2(_STARPU_FUT_NEW_MEM_NODE, nodeid, syscall(SYS_gettid));
 
-#define STARPU_TRACE_WORKER_INIT_START(workerkind, devid, memnode)	\
-	FUT_DO_PROBE4(STARPU_FUT_WORKER_INIT_START, workerkind, devid, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_WORKER_INIT_START(workerkind, devid, memnode)	\
+	FUT_DO_PROBE4(_STARPU_FUT_WORKER_INIT_START, workerkind, devid, memnode, syscall(SYS_gettid));
 
-#define STARPU_TRACE_WORKER_INIT_END				\
-	FUT_DO_PROBE1(STARPU_FUT_WORKER_INIT_END, syscall(SYS_gettid));
+#define _STARPU_TRACE_WORKER_INIT_END				\
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_INIT_END, syscall(SYS_gettid));
 
-#define STARPU_TRACE_START_CODELET_BODY(job)				\
+#define _STARPU_TRACE_START_CODELET_BODY(job)				\
 do {									\
         const char *model_name = _starpu_get_model_name((job));         \
 	if (model_name)                                                 \
 	{								\
 		/* we include the symbol name */			\
-		STARPU_FUT_DO_PROBE4STR(STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 1, model_name); \
+		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 1, model_name); \
 	}								\
 	else {                                                          \
-		FUT_DO_PROBE4(STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 0); \
+		FUT_DO_PROBE4(_STARPU_FUT_START_CODELET_BODY, (job), ((job)->task)->sched_ctx, syscall(SYS_gettid), 0); \
 	}								\
 } while(0);
 
-#define STARPU_TRACE_END_CODELET_BODY(job, archtype)			\
+#define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, archtype)			\
 do {									\
-	const size_t job_size = _starpu_job_get_data_size((job));	\
-	const uint32_t job_hash = _starpu_compute_buffers_footprint(job);\
-	FUT_DO_PROBE5(STARPU_FUT_END_CODELET_BODY, job, (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
+	const size_t job_size = _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));	\
+	const uint32_t job_hash = _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, archtype, nimpl, (job));\
+	FUT_DO_PROBE5(_STARPU_FUT_END_CODELET_BODY, (job), (job_size), (job_hash), (archtype), syscall(SYS_gettid));	\
 } while(0);
 
-#define STARPU_TRACE_START_CALLBACK(job)	\
-	FUT_DO_PROBE2(STARPU_FUT_START_CALLBACK, job, syscall(SYS_gettid));
+#define _STARPU_TRACE_START_CALLBACK(job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_START_CALLBACK, job, syscall(SYS_gettid));
 
-#define STARPU_TRACE_END_CALLBACK(job)	\
-	FUT_DO_PROBE2(STARPU_FUT_END_CALLBACK, job, syscall(SYS_gettid));
+#define _STARPU_TRACE_END_CALLBACK(job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_END_CALLBACK, job, syscall(SYS_gettid));
 
-#define STARPU_TRACE_JOB_PUSH(task, prio)	\
-	FUT_DO_PROBE3(STARPU_FUT_JOB_PUSH, task, prio, syscall(SYS_gettid));
+#define _STARPU_TRACE_JOB_PUSH(task, prio)	\
+	FUT_DO_PROBE3(_STARPU_FUT_JOB_PUSH, task, prio, syscall(SYS_gettid));
 
-#define STARPU_TRACE_JOB_POP(task, prio)	\
-	FUT_DO_PROBE3(STARPU_FUT_JOB_POP, task, prio, syscall(SYS_gettid));
+#define _STARPU_TRACE_JOB_POP(task, prio)	\
+	FUT_DO_PROBE3(_STARPU_FUT_JOB_POP, task, prio, syscall(SYS_gettid));
 
-#define STARPU_TRACE_UPDATE_TASK_CNT(counter)	\
-	FUT_DO_PROBE2(STARPU_FUT_UPDATE_TASK_CNT, counter, syscall(SYS_gettid))
+#define _STARPU_TRACE_UPDATE_TASK_CNT(counter)	\
+	FUT_DO_PROBE2(_STARPU_FUT_UPDATE_TASK_CNT, counter, syscall(SYS_gettid))
 
-#define STARPU_TRACE_START_FETCH_INPUT(job)	\
-	FUT_DO_PROBE2(STARPU_FUT_START_FETCH_INPUT, job, syscall(SYS_gettid));
+#define _STARPU_TRACE_START_FETCH_INPUT(job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_START_FETCH_INPUT, job, syscall(SYS_gettid));
 
-#define STARPU_TRACE_END_FETCH_INPUT(job)	\
-	FUT_DO_PROBE2(STARPU_FUT_END_FETCH_INPUT, job, syscall(SYS_gettid));
+#define _STARPU_TRACE_END_FETCH_INPUT(job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_END_FETCH_INPUT, job, syscall(SYS_gettid));
 
-#define STARPU_TRACE_START_PUSH_OUTPUT(job)	\
-	FUT_DO_PROBE2(STARPU_FUT_START_PUSH_OUTPUT, job, syscall(SYS_gettid));
+#define _STARPU_TRACE_START_PUSH_OUTPUT(job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_START_PUSH_OUTPUT, job, syscall(SYS_gettid));
 
-#define STARPU_TRACE_END_PUSH_OUTPUT(job)	\
-	FUT_DO_PROBE2(STARPU_FUT_END_PUSH_OUTPUT, job, syscall(SYS_gettid));
+#define _STARPU_TRACE_END_PUSH_OUTPUT(job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_END_PUSH_OUTPUT, job, syscall(SYS_gettid));
 
-#define STARPU_TRACE_TAG(tag, job)	\
-	FUT_DO_PROBE2(STARPU_FUT_TAG, tag, (job)->job_id)
+#define _STARPU_TRACE_TAG(tag, job)	\
+	FUT_DO_PROBE2(_STARPU_FUT_TAG, tag, (job)->job_id)
 
-#define STARPU_TRACE_TAG_DEPS(tag_child, tag_father)	\
-	FUT_DO_PROBE2(STARPU_FUT_TAG_DEPS, tag_child, tag_father)
+#define _STARPU_TRACE_TAG_DEPS(tag_child, tag_father)	\
+	FUT_DO_PROBE2(_STARPU_FUT_TAG_DEPS, tag_child, tag_father)
 
-#define STARPU_TRACE_TASK_DEPS(job_prev, job_succ)	\
-	FUT_DO_PROBE2(STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id)
+#define _STARPU_TRACE_TASK_DEPS(job_prev, job_succ)	\
+	FUT_DO_PROBE2(_STARPU_FUT_TASK_DEPS, (job_prev)->job_id, (job_succ)->job_id)
 
-#define STARPU_TRACE_GHOST_TASK_DEPS(ghost_prev_id, job_succ_id)		\
-	FUT_DO_PROBE2(STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ_id))
+#define _STARPU_TRACE_GHOST_TASK_DEPS(ghost_prev_id, job_succ_id)		\
+	FUT_DO_PROBE2(_STARPU_FUT_TASK_DEPS, (ghost_prev_id), (job_succ_id))
 
-#define STARPU_TRACE_TASK_DONE(job)						\
+#define _STARPU_TRACE_TASK_DONE(job)						\
 do {										\
 	unsigned exclude_from_dag = (job)->exclude_from_dag;			\
         const char *model_name = _starpu_get_model_name((job));                       \
 	if (model_name)					                        \
 	{									\
-		STARPU_FUT_DO_PROBE4STR(STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 1, model_name); \
+		_STARPU_FUT_DO_PROBE4STR(_STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 1, model_name);\
 	}									\
 	else {									\
-		FUT_DO_PROBE4(STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 0); \
+		FUT_DO_PROBE4(_STARPU_FUT_TASK_DONE, (job)->job_id, syscall(SYS_gettid), (long unsigned)exclude_from_dag, 0);\
 	}									\
 } while(0);
 
-#define STARPU_TRACE_TAG_DONE(tag)						\
+#define _STARPU_TRACE_TAG_DONE(tag)						\
 do {										\
-        struct starpu_job_s *job = (tag)->job;                                  \
+        struct _starpu_job *job = (tag)->job;                                  \
         const char *model_name = _starpu_get_model_name((job));                       \
 	if (model_name)                                                         \
 	{									\
-          STARPU_FUT_DO_PROBE3STR(STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 1, model_name); \
+          _STARPU_FUT_DO_PROBE3STR(_STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 1, model_name); \
 	}									\
 	else {									\
-		FUT_DO_PROBE3(STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 0);\
+		FUT_DO_PROBE3(_STARPU_FUT_TAG_DONE, (tag)->id, syscall(SYS_gettid), 0);\
 	}									\
 } while(0);
 
-#define STARPU_TRACE_DATA_COPY(src_node, dst_node, size)	\
-	FUT_DO_PROBE3(STARPU_FUT_DATA_COPY, src_node, dst_node, size)
+#define _STARPU_TRACE_DATA_COPY(src_node, dst_node, size)	\
+	FUT_DO_PROBE3(_STARPU_FUT_DATA_COPY, src_node, dst_node, size)
+
+#define _STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id)	\
+	FUT_DO_PROBE4(_STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id)
+
+#define _STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id)	\
+	FUT_DO_PROBE4(_STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id)
 
-#define STARPU_TRACE_START_DRIVER_COPY(src_node, dst_node, size, com_id)	\
-	FUT_DO_PROBE4(STARPU_FUT_START_DRIVER_COPY, src_node, dst_node, size, com_id)
+#define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node)	\
+	FUT_DO_PROBE2(_STARPU_FUT_START_DRIVER_COPY_ASYNC, src_node, dst_node)
 
-#define STARPU_TRACE_END_DRIVER_COPY(src_node, dst_node, size, com_id)	\
-	FUT_DO_PROBE4(STARPU_FUT_END_DRIVER_COPY, src_node, dst_node, size, com_id)
+#define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node)	\
+	FUT_DO_PROBE2(_STARPU_FUT_END_DRIVER_COPY, src_node, dst_node)
 
-#define STARPU_TRACE_WORK_STEALING(empty_q, victim_q)		\
-	FUT_DO_PROBE2(STARPU_FUT_WORK_STEALING, empty_q, victim_q)
+#define _STARPU_TRACE_WORK_STEALING(empty_q, victim_q)		\
+	FUT_DO_PROBE2(_STARPU_FUT_WORK_STEALING, empty_q, victim_q)
 
-#define STARPU_TRACE_WORKER_DEINIT_START			\
-	FUT_DO_PROBE1(STARPU_FUT_WORKER_DEINIT_START, syscall(SYS_gettid));
+#define _STARPU_TRACE_WORKER_DEINIT_START			\
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_DEINIT_START, syscall(SYS_gettid));
 
-#define STARPU_TRACE_WORKER_DEINIT_END(workerkind)		\
-	FUT_DO_PROBE2(STARPU_FUT_WORKER_DEINIT_END, workerkind, syscall(SYS_gettid));
+#define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		\
+	FUT_DO_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, syscall(SYS_gettid));
 
-#define STARPU_TRACE_WORKER_SLEEP_START	\
-	FUT_DO_PROBE1(STARPU_FUT_WORKER_SLEEP_START, syscall(SYS_gettid));
+#define _STARPU_TRACE_WORKER_SLEEP_START	\
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_START, syscall(SYS_gettid));
 
-#define STARPU_TRACE_WORKER_SLEEP_END	\
-	FUT_DO_PROBE1(STARPU_FUT_WORKER_SLEEP_END, syscall(SYS_gettid));
+#define _STARPU_TRACE_WORKER_SLEEP_END	\
+	FUT_DO_PROBE1(_STARPU_FUT_WORKER_SLEEP_END, syscall(SYS_gettid));
 
-#define STARPU_TRACE_USER_DEFINED_START	\
-	FUT_DO_PROBE1(STARPU_FUT_USER_DEFINED_START, syscall(SYS_gettid));
+#define _STARPU_TRACE_USER_DEFINED_START	\
+	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_START, syscall(SYS_gettid));
 
-#define STARPU_TRACE_USER_DEFINED_END		\
-	FUT_DO_PROBE1(STARPU_FUT_USER_DEFINED_END, syscall(SYS_gettid));
+#define _STARPU_TRACE_USER_DEFINED_END		\
+	FUT_DO_PROBE1(_STARPU_FUT_USER_DEFINED_END, syscall(SYS_gettid));
 
-#define STARPU_TRACE_START_ALLOC(memnode)		\
-	FUT_DO_PROBE2(STARPU_FUT_START_ALLOC, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_START_ALLOC(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC, memnode, syscall(SYS_gettid));
 	
-#define STARPU_TRACE_END_ALLOC(memnode)		\
-	FUT_DO_PROBE2(STARPU_FUT_END_ALLOC, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_END_ALLOC(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC, memnode, syscall(SYS_gettid));
 
-#define STARPU_TRACE_START_ALLOC_REUSE(memnode)		\
-	FUT_DO_PROBE2(STARPU_FUT_START_ALLOC_REUSE, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_START_ALLOC_REUSE(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_START_ALLOC_REUSE, memnode, syscall(SYS_gettid));
 	
-#define STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
-	FUT_DO_PROBE2(STARPU_FUT_END_ALLOC_REUSE, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_END_ALLOC_REUSE(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_END_ALLOC_REUSE, memnode, syscall(SYS_gettid));
 	
-#define STARPU_TRACE_START_MEMRECLAIM(memnode)		\
-	FUT_DO_PROBE2(STARPU_FUT_START_MEMRECLAIM, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_START_MEMRECLAIM(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_START_MEMRECLAIM, memnode, syscall(SYS_gettid));
 	
-#define STARPU_TRACE_END_MEMRECLAIM(memnode)		\
-	FUT_DO_PROBE2(STARPU_FUT_END_MEMRECLAIM, memnode, syscall(SYS_gettid));
+#define _STARPU_TRACE_END_MEMRECLAIM(memnode)		\
+	FUT_DO_PROBE2(_STARPU_FUT_END_MEMRECLAIM, memnode, syscall(SYS_gettid));
 	
 /* We skip these events becasue they are called so often that they cause FxT to
  * fail and make the overall trace unreadable anyway. */
-#define STARPU_TRACE_START_PROGRESS(memnode)		\
+#define _STARPU_TRACE_START_PROGRESS(memnode)		\
 	do {} while (0);
-//	FUT_DO_PROBE2(STARPU_FUT_START_PROGRESS, memnode, syscall(SYS_gettid));
+//	FUT_DO_PROBE2(_STARPU_FUT_START_PROGRESS, memnode, syscall(SYS_gettid));
 
-#define STARPU_TRACE_END_PROGRESS(memnode)		\
+#define _STARPU_TRACE_END_PROGRESS(memnode)		\
 	do {} while (0);
-	//FUT_DO_PROBE2(STARPU_FUT_END_PROGRESS, memnode, syscall(SYS_gettid));
+	//FUT_DO_PROBE2(_STARPU_FUT_END_PROGRESS, memnode, syscall(SYS_gettid));
 	
-#define STARPU_TRACE_USER_EVENT(code)			\
-	FUT_DO_PROBE2(STARPU_FUT_USER_EVENT, code, syscall(SYS_gettid));
+#define _STARPU_TRACE_USER_EVENT(code)			\
+	FUT_DO_PROBE2(_STARPU_FUT_USER_EVENT, code, syscall(SYS_gettid));
 
-#define STARPU_TRACE_SET_PROFILING(status)		\
-	FUT_DO_PROBE2(STARPU_FUT_SET_PROFILING, status, syscall(SYS_gettid));
+#define _STARPU_TRACE_SET_PROFILING(status)		\
+	FUT_DO_PROBE2(_STARPU_FUT_SET_PROFILING, status, syscall(SYS_gettid));
 
-#define STARPU_TRACE_TASK_WAIT_FOR_ALL			\
-	FUT_DO_PROBE0(STARPU_FUT_TASK_WAIT_FOR_ALL)
+#define _STARPU_TRACE_TASK_WAIT_FOR_ALL			\
+	FUT_DO_PROBE0(_STARPU_FUT_TASK_WAIT_FOR_ALL)
 
 #else // !STARPU_USE_FXT
 
 /* Dummy macros in case FxT is disabled */
-#define STARPU_TRACE_NEW_MEM_NODE(nodeid)	do {} while(0);
-#define TRACE_NEW_WORKER(a,b)			do {} while(0);
-#define STARPU_TRACE_WORKER_INIT_START(a,b,c)	do {} while(0);
-#define STARPU_TRACE_WORKER_INIT_END		do {} while(0);
-#define STARPU_TRACE_START_CODELET_BODY(job)	do {} while(0);
-#define STARPU_TRACE_END_CODELET_BODY(job, a)	do {} while(0);
-#define STARPU_TRACE_START_CALLBACK(job)	do {} while(0);
-#define STARPU_TRACE_END_CALLBACK(job)		do {} while(0);
-#define STARPU_TRACE_JOB_PUSH(task, prio)	do {} while(0);
-#define STARPU_TRACE_JOB_POP(task, prio)	do {} while(0);
-#define STARPU_TRACE_UPDATE_TASK_CNT(counter)	do {} while(0);
-#define STARPU_TRACE_START_FETCH_INPUT(job)	do {} while(0);
-#define STARPU_TRACE_END_FETCH_INPUT(job)	do {} while(0);
-#define STARPU_TRACE_START_PUSH_OUTPUT(job)	do {} while(0);
-#define STARPU_TRACE_END_PUSH_OUTPUT(job)	do {} while(0);
-#define STARPU_TRACE_TAG(tag, job)	do {} while(0);
-#define STARPU_TRACE_TAG_DEPS(a, b)	do {} while(0);
-#define STARPU_TRACE_TASK_DEPS(a, b)		do {} while(0);
-#define STARPU_TRACE_GHOST_TASK_DEPS(a, b)	do {} while(0);
-#define STARPU_TRACE_TASK_DONE(a)		do {} while(0);
-#define STARPU_TRACE_TAG_DONE(a)		do {} while(0);
-#define STARPU_TRACE_DATA_COPY(a, b, c)		do {} while(0);
-#define STARPU_TRACE_START_DRIVER_COPY(a,b,c,d)	do {} while(0);
-#define STARPU_TRACE_END_DRIVER_COPY(a,b,c,d)	do {} while(0);
-#define STARPU_TRACE_WORK_STEALING(a, b)	do {} while(0);
-#define STARPU_TRACE_WORKER_DEINIT_START	do {} while(0);
-#define STARPU_TRACE_WORKER_DEINIT_END(a)	do {} while(0);
-#define STARPU_TRACE_WORKER_SLEEP_START		do {} while(0);
-#define STARPU_TRACE_WORKER_SLEEP_END		do {} while(0);
-#define STARPU_TRACE_USER_DEFINED_START		do {} while(0);
-#define STARPU_TRACE_USER_DEFINED_END		do {} while(0);
-#define STARPU_TRACE_START_ALLOC(memnode)	do {} while(0);
-#define STARPU_TRACE_END_ALLOC(memnode)		do {} while(0);
-#define STARPU_TRACE_START_ALLOC_REUSE(a)	do {} while(0);
-#define STARPU_TRACE_END_ALLOC_REUSE(a)		do {} while(0);
-#define STARPU_TRACE_START_MEMRECLAIM(memnode)	do {} while(0);
-#define STARPU_TRACE_END_MEMRECLAIM(memnode)	do {} while(0);
-#define STARPU_TRACE_START_PROGRESS(memnode)	do {} while(0);
-#define STARPU_TRACE_END_PROGRESS(memnode)	do {} while(0);
-#define STARPU_TRACE_USER_EVENT(code)		do {} while(0);
-#define STARPU_TRACE_SET_PROFILING(status)	do {} while(0);
-#define STARPU_TRACE_TASK_WAIT_FOR_ALL		do {} while(0);
+#define _STARPU_TRACE_NEW_MEM_NODE(nodeid)	do {} while(0);
+#define _STARPU_TRACE_WORKER_INIT_START(a,b,c)	do {} while(0);
+#define _STARPU_TRACE_WORKER_INIT_END		do {} while(0);
+#define _STARPU_TRACE_START_CODELET_BODY(job)	do {} while(0);
+#define _STARPU_TRACE_END_CODELET_BODY(job, nimpl, a)	do {} while(0);
+#define _STARPU_TRACE_START_CALLBACK(job)	do {} while(0);
+#define _STARPU_TRACE_END_CALLBACK(job)		do {} while(0);
+#define _STARPU_TRACE_JOB_PUSH(task, prio)	do {} while(0);
+#define _STARPU_TRACE_JOB_POP(task, prio)	do {} while(0);
+#define _STARPU_TRACE_UPDATE_TASK_CNT(counter)	do {} while(0);
+#define _STARPU_TRACE_START_FETCH_INPUT(job)	do {} while(0);
+#define _STARPU_TRACE_END_FETCH_INPUT(job)	do {} while(0);
+#define _STARPU_TRACE_START_PUSH_OUTPUT(job)	do {} while(0);
+#define _STARPU_TRACE_END_PUSH_OUTPUT(job)	do {} while(0);
+#define _STARPU_TRACE_TAG(tag, job)	do {} while(0);
+#define _STARPU_TRACE_TAG_DEPS(a, b)	do {} while(0);
+#define _STARPU_TRACE_TASK_DEPS(a, b)		do {} while(0);
+#define _STARPU_TRACE_GHOST_TASK_DEPS(a, b)	do {} while(0);
+#define _STARPU_TRACE_TASK_DONE(a)		do {} while(0);
+#define _STARPU_TRACE_TAG_DONE(a)		do {} while(0);
+#define _STARPU_TRACE_DATA_COPY(a, b, c)		do {} while(0);
+#define _STARPU_TRACE_START_DRIVER_COPY(a,b,c,d)	do {} while(0);
+#define _STARPU_TRACE_END_DRIVER_COPY(a,b,c,d)	do {} while(0);
+#define _STARPU_TRACE_START_DRIVER_COPY_ASYNC(a,b)	do {} while(0);
+#define _STARPU_TRACE_END_DRIVER_COPY_ASYNC(a,b)	do {} while(0);
+#define _STARPU_TRACE_WORK_STEALING(a, b)	do {} while(0);
+#define _STARPU_TRACE_WORKER_DEINIT_START	do {} while(0);
+#define _STARPU_TRACE_WORKER_DEINIT_END(a)	do {} while(0);
+#define _STARPU_TRACE_WORKER_SLEEP_START		do {} while(0);
+#define _STARPU_TRACE_WORKER_SLEEP_END		do {} while(0);
+#define _STARPU_TRACE_USER_DEFINED_START		do {} while(0);
+#define _STARPU_TRACE_USER_DEFINED_END		do {} while(0);
+#define _STARPU_TRACE_START_ALLOC(memnode)	do {} while(0);
+#define _STARPU_TRACE_END_ALLOC(memnode)		do {} while(0);
+#define _STARPU_TRACE_START_ALLOC_REUSE(a)	do {} while(0);
+#define _STARPU_TRACE_END_ALLOC_REUSE(a)		do {} while(0);
+#define _STARPU_TRACE_START_MEMRECLAIM(memnode)	do {} while(0);
+#define _STARPU_TRACE_END_MEMRECLAIM(memnode)	do {} while(0);
+#define _STARPU_TRACE_START_PROGRESS(memnode)	do {} while(0);
+#define _STARPU_TRACE_END_PROGRESS(memnode)	do {} while(0);
+#define _STARPU_TRACE_USER_EVENT(code)		do {} while(0);
+#define _STARPU_TRACE_SET_PROFILING(status)	do {} while(0);
+#define _STARPU_TRACE_TASK_WAIT_FOR_ALL		do {} while(0);
 
 #endif // STARPU_USE_FXT
 

+ 26 - 13
src/common/hash.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009-2011  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -15,39 +15,52 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-#include <common/hash.h>
+#include <starpu_hash.h>
 #include <stdlib.h>
 #include <string.h>
 
-#define CRC32C_POLY_BE 0x1EDC6F41
+#define _STARPU_CRC32C_POLY_BE 0x1EDC6F41
 
-static inline uint32_t __attribute__ ((pure)) crc32_be_8(uint8_t inputbyte, uint32_t inputcrc)
+static inline uint32_t __attribute__ ((pure)) starpu_crc32_be_8(uint8_t inputbyte, uint32_t inputcrc)
 {
 	unsigned i;
 	uint32_t crc;
 
 	crc = inputcrc ^ (inputbyte << 24);
 	for (i = 0; i < 8; i++)
-		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32C_POLY_BE : 0);
+		crc = (crc << 1) ^ ((crc & 0x80000000) ? _STARPU_CRC32C_POLY_BE : 0);
 
 	return crc;
 }
 
-uint32_t _starpu_crc32_be(uint32_t input, uint32_t inputcrc)
+uint32_t starpu_crc32_be_n(void *input, size_t n, uint32_t inputcrc)
+{
+	uint8_t *p = (uint8_t *)input;
+	size_t i;
+
+	uint32_t crc = inputcrc;
+
+	for (i = 0; i < n; i++)
+		crc = starpu_crc32_be_8(p[i], crc);
+
+	return crc;
+}
+
+uint32_t starpu_crc32_be(uint32_t input, uint32_t inputcrc)
 {
 	uint8_t *p = (uint8_t *)&input;
 
 	uint32_t crc = inputcrc;
 
-	crc = crc32_be_8(p[0], crc);
-	crc = crc32_be_8(p[1], crc);
-	crc = crc32_be_8(p[2], crc);
-	crc = crc32_be_8(p[3], crc);
+	crc = starpu_crc32_be_8(p[0], crc);
+	crc = starpu_crc32_be_8(p[1], crc);
+	crc = starpu_crc32_be_8(p[2], crc);
+	crc = starpu_crc32_be_8(p[3], crc);
 
 	return crc;
 }
 
-uint32_t _starpu_crc32_string(char *str, uint32_t inputcrc)
+uint32_t starpu_crc32_string(char *str, uint32_t inputcrc)
 {
 	uint32_t hash = inputcrc;
 
@@ -56,7 +69,7 @@ uint32_t _starpu_crc32_string(char *str, uint32_t inputcrc)
 	unsigned i;
 	for (i = 0; i < len; i++)
 	{
-		hash = crc32_be_8((uint8_t)str[i], hash);
+		hash = starpu_crc32_be_8((uint8_t)str[i], hash);
 	}
 
 	return hash;

+ 0 - 33
src/common/hash.h

@@ -1,33 +0,0 @@
-/* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
-
-#ifndef __HASH_H__
-#define __HASH_H__
-
-#include <stdint.h>
-
-/* Compute the CRC of a 32bit number seeded by the inputcrc "current state".
- * The return value should be considered as the new "current state" for future
- * CRC computation. */
-uint32_t _starpu_crc32_be(uint32_t input, uint32_t inputcrc);
-
-/* Compute the CRC of a string seeded by the inputcrc "current state".  The
- * return value should be considered as the new "current state" for future CRC
- * computation. */
-uint32_t _starpu_crc32_string(char *str, uint32_t inputcrc);
-
-#endif // __HASH_H__

+ 54 - 28
src/common/htable32.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009-2010, 2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,33 +21,32 @@
 #include <stdint.h>
 #include <string.h>
 
-void *_starpu_htbl_search_32(struct starpu_htbl32_node_s *htbl, uint32_t key)
+void *_starpu_htbl_search_32(struct starpu_htbl32_node *htbl, uint32_t key)
 {
 	unsigned currentbit;
-	unsigned keysize = 32;
+	unsigned keysize = sizeof(uint32_t)*8;
 
-	starpu_htbl32_node_t *current_htbl = htbl;
+	struct starpu_htbl32_node *current_htbl = htbl;
 
 	/* 000000000001111 with HTBL_NODE_SIZE 1's */
-	uint32_t mask = (1<<STARPU_HTBL32_NODE_SIZE)-1;
+	uint32_t mask = (1<<_STARPU_HTBL32_NODE_SIZE)-1;
 
-	for(currentbit = 0; currentbit < keysize; currentbit+=STARPU_HTBL32_NODE_SIZE)
+	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
 	{
-	
-	//	printf("search : current bit = %d \n", currentbit);
+		//	printf("search : current bit = %d \n", currentbit);
 		if (STARPU_UNLIKELY(current_htbl == NULL))
 			return NULL;
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
-			keysize - (currentbit + STARPU_HTBL32_NODE_SIZE);
+		unsigned last_currentbit =
+			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
 		uint32_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(key & (offloaded_mask)) >> (last_currentbit);
 
 		current_htbl = current_htbl->children[current_index];
@@ -60,45 +59,72 @@ void *_starpu_htbl_search_32(struct starpu_htbl32_node_s *htbl, uint32_t key)
  * returns the previous value of the tag, or NULL else
  */
 
-void *_starpu_htbl_insert_32(struct starpu_htbl32_node_s **htbl, uint32_t key, void *entry)
+void *_starpu_htbl_insert_32(struct starpu_htbl32_node **htbl, uint32_t key, void *entry)
 {
 	unsigned currentbit;
-	unsigned keysize = 32;
+	unsigned keysize = sizeof(uint32_t)*8;
 
-	starpu_htbl32_node_t **current_htbl_ptr = htbl;
+	struct starpu_htbl32_node **current_htbl_ptr = htbl;
 
 	/* 000000000001111 with HTBL_NODE_SIZE 1's */
-	uint32_t mask = (1<<STARPU_HTBL32_NODE_SIZE)-1;
+	uint32_t mask = (1<<_STARPU_HTBL32_NODE_SIZE)-1;
 
-	for(currentbit = 0; currentbit < keysize; currentbit+=STARPU_HTBL32_NODE_SIZE)
+	for(currentbit = 0; currentbit < keysize; currentbit+=_STARPU_HTBL32_NODE_SIZE)
 	{
 		//printf("insert : current bit = %d \n", currentbit);
-		if (*current_htbl_ptr == NULL) {
+		if (*current_htbl_ptr == NULL)
+		{
 			/* TODO pad to change that 1 into 16 ? */
-			*current_htbl_ptr = (starpu_htbl32_node_t*)calloc(sizeof(starpu_htbl32_node_t), 1);
-			assert(*current_htbl_ptr);
+			*current_htbl_ptr = (struct starpu_htbl32_node*)calloc(sizeof(struct starpu_htbl32_node), 1);
+			STARPU_ASSERT(*current_htbl_ptr);
 		}
 
-		/* 0000000000001111 
+		/* 0000000000001111
 		 *     | currentbit
 		 * 0000111100000000 = offloaded_mask
 		 *         |last_currentbit
 		 * */
 
-		unsigned last_currentbit = 
-			keysize - (currentbit + STARPU_HTBL32_NODE_SIZE);
+		unsigned last_currentbit =
+			keysize - (currentbit + _STARPU_HTBL32_NODE_SIZE);
 		uint32_t offloaded_mask = mask << last_currentbit;
-		unsigned current_index = 
+		unsigned current_index =
 			(key & (offloaded_mask)) >> (last_currentbit);
 
-		current_htbl_ptr = 
+		current_htbl_ptr =
 			&((*current_htbl_ptr)->children[current_index]);
 	}
 
-	/* current_htbl either contains NULL or a previous entry 
+	/* current_htbl either contains NULL or a previous entry
 	 * we overwrite it anyway */
 	void *old_entry = *current_htbl_ptr;
-	*current_htbl_ptr = (starpu_htbl32_node_t *) entry;
+	*current_htbl_ptr = (struct starpu_htbl32_node *) entry;
 
 	return old_entry;
 }
+
+static void _starpu_htbl_destroy_32_bit(struct starpu_htbl32_node *htbl, unsigned bit, void (*remove)(void*))
+{
+	unsigned keysize = sizeof(uint32_t)*8;
+	int i;
+
+	if (!htbl)
+		return;
+
+	if (bit >= keysize) {
+		/* entry, delete it */
+		if (remove)
+			remove(htbl);
+		return;
+	}
+
+	for (i = 0; i < 1<<_STARPU_HTBL32_NODE_SIZE; i++) {
+		_starpu_htbl_destroy_32_bit(htbl->children[i], bit+_STARPU_HTBL32_NODE_SIZE, remove);
+	}
+
+	free(htbl);
+}
+void _starpu_htbl_destroy_32(struct starpu_htbl32_node *htbl, void (*remove)(void*))
+{
+	_starpu_htbl_destroy_32_bit(htbl, 0, remove);
+}

+ 12 - 8
src/common/htable32.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009-2010, 2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,21 +23,25 @@
 #include <stdio.h>
 #include <assert.h>
 
-#define STARPU_HTBL32_NODE_SIZE	16
+#define _STARPU_HTBL32_NODE_SIZE	16
 
 /* Hierarchical table: all nodes have a 2^16 arity . */
-typedef struct starpu_htbl32_node_s {
+/* Note: this struct is used in include/starpu_perfmodel.h */
+struct starpu_htbl32_node {
 	unsigned nentries;
-	struct starpu_htbl32_node_s *children[1<<STARPU_HTBL32_NODE_SIZE];
-} starpu_htbl32_node_t;
+	struct starpu_htbl32_node *children[1<<_STARPU_HTBL32_NODE_SIZE];
+};
 
 /* Look for a 32bit key into the hierchical table. Returns the entry if
  * something is found, NULL otherwise. */
-void *_starpu_htbl_search_32(struct starpu_htbl32_node_s *htbl, uint32_t key);
+void *_starpu_htbl_search_32(struct starpu_htbl32_node *htbl, uint32_t key);
 
 /* Insert an entry indexed by the 32bit key into the hierarchical table.
  * Returns the entry that was previously associated to that key if any, NULL
  * otherwise. */
-void *_starpu_htbl_insert_32(struct starpu_htbl32_node_s **htbl, uint32_t key, void *entry);
+void *_starpu_htbl_insert_32(struct starpu_htbl32_node **htbl, uint32_t key, void *entry);
+
+/* Delete the content of the table, `remove' being called on each element */
+void _starpu_htbl_destroy_32(struct starpu_htbl32_node *htbl, void (*remove)(void*));
 
 #endif // __GENERIC_HTABLE_H__

+ 73 - 66
src/common/list.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -16,67 +16,79 @@
  */
 
 /** @file
- * @brief Listes doublement chainées automatiques
+ * @brief Listes doublement chainées automatiques
  */
 
 
 /** @remarks list how-to
  * *********************************************************
  * LIST_TYPE(FOO, contenu);
- *  - déclare les types suivants
- *      + pour les cellules : FOO_t
- *      + pour les listes : FOO_list_t
- *      + pour les itérateurs : FOO_itor_t
- *  - déclare les accesseurs suivants :
- *     * création d'une cellule 
+ *  - déclare les types suivants
+ *      + pour les cellules : FOO
+ *      + pour les listes : FOO_list
+ *      + pour les itérateurs : FOO
+ *  - déclare les accesseurs suivants :
+ *     * création d'une cellule 
  *   FOO_t      FOO_new(void);  
  *     * suppression d'une cellule
  *   void       FOO_delete(FOO_t); 
- *     * création d'une liste (vide)
+ *     * création d'une liste (vide)
  *   FOO_list_t FOO_list_new(void);
  *     * suppression d'une liste
  *   void       FOO_list_delete(FOO_list_t);
  *     * teste si une liste est vide
  *   int        FOO_list_empty(FOO_list_t);
- *     * retire un élément de la liste
+ *     * retire un élément de la liste
  *   void       FOO_list_erase(FOO_list_t, FOO_t);
- *     * ajoute une élément en queue de liste
+ *     * ajoute une élément en queue de liste
  *   void       FOO_list_push_back(FOO_list_t, FOO_t);
- *     * ajoute un élément en tête de list
+ *     * ajoute un élément en tête de list
  *   void       FOO_list_push_front(FOO_list_t, FOO_t);
- *     * retire l'élément en queue de liste
+ *     * ajoute la deuxième liste à la fin de la première liste
+ *   FOO_t      FOO_list_push_list_back(FOO_list_t, FOO_list_t);
+ *     * ajoute la première liste au début de la deuxième liste
+ *   FOO_t      FOO_list_push_list_front(FOO_list_t, FOO_list_t);
+ *     * retire l'élément en queue de liste
  *   FOO_t      FOO_list_pop_back(FOO_list_t);
- *     * retire l'élement en tête de liste
+ *     * retire l'élement en tête de liste
  *   FOO_t      FOO_list_pop_front(FOO_list_t);
- *     * retourne l'élément en queue de liste
+ *     * retourne l'élément en queue de liste
  *   FOO_t      FOO_list_back(FOO_list_t);
- *     * retourne l'élement en tête de liste
+ *     * retourne l'élement en tête de liste
  *   FOO_t      FOO_list_front(FOO_list_t);
- *     * vérifie si la liste chainée est cohérente
+ *     * vérifie si la liste chainée est cohérente
  *   int	FOO_list_check(FOO_list_t);
+ *     *
+ *   FOO_t      FOO_list_begin(FOO_list_t);
+ *     *
+ *   FOO_t      FOO_list_end(FOO_list_t);
+ *     *
+ *   FOO_t      FOO_list_next(FOO_t)
+ *     *
+ *   int        FOO_list_size(FOO_list_t)
  * *********************************************************
  * Exemples d'utilisation :
- *  - au départ, on a :
+ *  - au départ, on a :
  *    struct ma_structure_s
  *    {
  *      int a;
  *      int b;
  *    };
- *  - on veut en faire une liste. On remplace la déclaration par :
+ *  - on veut en faire une liste. On remplace la déclaration par :
  *    LIST_TYPE(ma_structure,
  *      int a;
  *      int b;
  *    );
- *    qui crée les types ma_structure_t et ma_structure_list_t.
+ *    qui crée les types ma_structure_t et ma_structure_list_t.
  *  - allocation d'une liste vide :
  *  ma_structure_list_t l = ma_structure_list_new();
- *  - ajouter un élément 'e' en tête de la liste 'l' :
+ *  - ajouter un élément 'e' en tête de la liste 'l' :
  *  ma_structure_t e = ma_structure_new();
  *  e->a = 0;
  *  e->b = 1;
  *  ma_structure_list_push_front(l, e);
- *  - itérateur de liste :
- *  ma_structure_itor_t i;
+ *  - itérateur de liste :
+ *  ma_structure i;
  *  for(i  = ma_structure_list_begin(l);
  *      i != ma_structure_list_end(l);
  *      i  = ma_structure_list_next(i))
@@ -91,76 +103,71 @@
 /**@hideinitializer
  * Generates a new type for list of elements */
 #define LIST_TYPE(ENAME, DECL) \
-  LIST_DECLARE_TYPE(ENAME) \
   LIST_CREATE_TYPE(ENAME, DECL)
 
 /**@hideinitializer
- * Forward type declaration for lists */
-#define LIST_DECLARE_TYPE(ENAME) \
-  /** automatic type: ENAME##_list_t is a list of ENAME##_t */ \
-  typedef struct ENAME##_list_s* ENAME##_list_t; \
-  /** automatic type: defines ENAME##_t */ \
-  typedef struct ENAME##_s* ENAME##_t; \
-  /** automatic type: ENAME##_itor_t is an iterator on lists of ENAME##_t */ \
-  typedef ENAME##_t ENAME##_itor_t;
-
-/**@hideinitializer
  * The effective type declaration for lists */
 #define LIST_CREATE_TYPE(ENAME, DECL) \
-  /** from automatic type: ENAME##_t */ \
-  struct ENAME##_s \
+  /** from automatic type: struct ENAME */ \
+  struct ENAME \
   { \
-    struct ENAME##_s*_prev; /**< @internal previous cell */ \
-    struct ENAME##_s*_next; /**< @internal next cell */ \
+    struct ENAME *_prev; /**< @internal previous cell */ \
+    struct ENAME *_next; /**< @internal next cell */ \
     DECL \
   }; \
   /** @internal */ \
-  struct ENAME##_list_s \
+  struct ENAME##_list \
   { \
-    struct ENAME##_s* _head; /**< @internal head of the list */ \
-    struct ENAME##_s* _tail; /**< @internal tail of the list */ \
+    struct ENAME *_head; /**< @internal head of the list */ \
+    struct ENAME *_tail; /**< @internal tail of the list */ \
   }; \
-  /** @internal */static inline ENAME##_t ENAME##_new(void) \
-    { ENAME##_t e = (ENAME##_t)malloc(sizeof(struct ENAME##_s)); \
+  /** @internal */static inline struct ENAME *ENAME##_new(void) \
+    { struct ENAME *e = (struct ENAME *)malloc(sizeof(struct ENAME)); \
       e->_next = NULL; e->_prev = NULL; return e; } \
-  /** @internal */static inline void ENAME##_delete(ENAME##_t e) \
+  /** @internal */static inline void ENAME##_delete(struct ENAME *e) \
     { free(e); } \
-  /** @internal */static inline void ENAME##_list_push_front(ENAME##_list_t l, ENAME##_t e) \
+  /** @internal */static inline void ENAME##_list_push_front(struct ENAME##_list *l, struct ENAME *e) \
     { if(l->_tail == NULL) l->_tail = e; else l->_head->_prev = e; \
       e->_prev = NULL; e->_next = l->_head; l->_head = e; } \
-  /** @internal */static inline void ENAME##_list_push_back(ENAME##_list_t l, ENAME##_t e) \
+  /** @internal */static inline void ENAME##_list_push_back(struct ENAME##_list *l, struct ENAME *e) \
     { if(l->_head == NULL) l->_head = e; else l->_tail->_next = e; \
       e->_next = NULL; e->_prev = l->_tail; l->_tail = e; } \
-  /** @internal */static inline ENAME##_t ENAME##_list_front(ENAME##_list_t l) \
+  /** @internal */static inline void ENAME##_list_push_list_front(struct ENAME##_list *l1, struct ENAME##_list *l2) \
+    { if (l2->_head == NULL) { l2->_head = l1->_head; l2->_tail = l1->_tail; } \
+      else if (l1->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l2->_head = l1->_head; } } \
+  /** @internal */static inline void ENAME##_list_push_list_back(struct ENAME##_list *l1, struct ENAME##_list *l2) \
+    { if(l1->_head == NULL) { l1->_head = l2->_head; l1->_tail = l2->_tail; } \
+      else if (l2->_head != NULL) { l1->_tail->_next = l2->_head; l2->_head->_prev = l1->_tail; l1->_tail = l2->_head; } } \
+  /** @internal */static inline struct ENAME *ENAME##_list_front(struct ENAME##_list *l) \
     { return l->_head; } \
-  /** @internal */static inline ENAME##_t ENAME##_list_back(ENAME##_list_t l) \
+  /** @internal */static inline struct ENAME *ENAME##_list_back(struct ENAME##_list *l) \
     { return l->_tail; } \
-  /** @internal */static inline ENAME##_list_t ENAME##_list_new(void) \
-    { ENAME##_list_t l; l=(ENAME##_list_t)malloc(sizeof(struct ENAME##_list_s)); \
+  /** @internal */static inline struct ENAME##_list *ENAME##_list_new(void) \
+    { struct ENAME##_list *l; l=(struct ENAME##_list *)malloc(sizeof(struct ENAME##_list)); \
       l->_head=NULL; l->_tail=l->_head; return l; } \
-  /** @internal */static inline int ENAME##_list_empty(ENAME##_list_t l) \
+  /** @internal */static inline int ENAME##_list_empty(struct ENAME##_list *l) \
     { return (l->_head == NULL); } \
-  /** @internal */static inline void ENAME##_list_delete(ENAME##_list_t l) \
+  /** @internal */static inline void ENAME##_list_delete(struct ENAME##_list *l) \
     { free(l); } \
-  /** @internal */static inline void ENAME##_list_erase(ENAME##_list_t l, ENAME##_t c) \
-    { ENAME##_t p = c->_prev; if(p) p->_next = c->_next; else l->_head = c->_next; \
+  /** @internal */static inline void ENAME##_list_erase(struct ENAME##_list *l, struct ENAME *c) \
+    { struct ENAME *p = c->_prev; if(p) p->_next = c->_next; else l->_head = c->_next; \
       if(c->_next) c->_next->_prev = p; else l->_tail = p; } \
-  /** @internal */static inline ENAME##_t ENAME##_list_pop_front(ENAME##_list_t l) \
-    { ENAME##_t e = ENAME##_list_front(l); \
+  /** @internal */static inline struct ENAME *ENAME##_list_pop_front(struct ENAME##_list *l) \
+    { struct ENAME *e = ENAME##_list_front(l); \
       ENAME##_list_erase(l, e); return e; } \
-  /** @internal */static inline ENAME##_t ENAME##_list_pop_back(ENAME##_list_t l) \
-    { ENAME##_t e = ENAME##_list_back(l); \
+  /** @internal */static inline struct ENAME *ENAME##_list_pop_back(struct ENAME##_list *l) \
+    { struct ENAME *e = ENAME##_list_back(l); \
       ENAME##_list_erase(l, e); return e; } \
-  /** @internal */static inline ENAME##_itor_t ENAME##_list_begin(ENAME##_list_t l) \
+  /** @internal */static inline struct ENAME *ENAME##_list_begin(struct ENAME##_list *l) \
     { return l->_head; } \
-  /** @internal */static inline ENAME##_itor_t ENAME##_list_end(ENAME##_list_t l __attribute__ ((unused))) \
+  /** @internal */static inline struct ENAME *ENAME##_list_end(struct ENAME##_list *l __attribute__ ((unused))) \
     { return NULL; } \
-  /** @internal */static inline ENAME##_itor_t ENAME##_list_next(ENAME##_itor_t i) \
+  /** @internal */static inline struct ENAME *ENAME##_list_next(struct ENAME *i) \
     { return i->_next; } \
-  /** @internal */static inline int ENAME##_list_size(ENAME##_list_t l) \
-    { ENAME##_itor_t i=l->_head; int k=0; while(i!=NULL){k++;i=i->_next;} return k; } \
-  /** @internal */static inline int ENAME##_list_check(ENAME##_list_t l) \
-    { ENAME##_itor_t i=l->_head; while(i) \
+  /** @internal */static inline int ENAME##_list_size(struct ENAME##_list *l) \
+    { struct ENAME *i=l->_head; int k=0; while(i!=NULL){k++;i=i->_next;} return k; } \
+  /** @internal */static inline int ENAME##_list_check(struct ENAME##_list *l) \
+    { struct ENAME *i=l->_head; while(i) \
     { if ((i->_next == NULL) && i != l->_tail) return 0; \
       if (i->_next == i) return 0; \
       i=i->_next;} return 1; }

+ 50 - 39
src/common/rwlock.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -17,24 +17,26 @@
 
 /**
  * A dummy implementation of a rw_lock using spinlocks ...
- */ 
+ */
 
 #include "rwlock.h"
 
-static void _take_busy_lock(starpu_rw_lock_t *lock)
+static void _starpu_take_busy_lock(struct _starpu_rw_lock *lock)
 {
 	uint32_t prev;
-	do {
+	do
+	{
 		prev = STARPU_TEST_AND_SET(&lock->busy, 1);
-	} while (prev);
+	}
+	while (prev);
 }
 
-static void _release_busy_lock(starpu_rw_lock_t *lock)
+static void _starpu_release_busy_lock(struct _starpu_rw_lock *lock)
 {
 	STARPU_RELEASE(&lock->busy);
 }
 
-void _starpu_init_rw_lock(starpu_rw_lock_t *lock)
+void _starpu_init_rw_lock(struct _starpu_rw_lock *lock)
 {
 	STARPU_ASSERT(lock);
 
@@ -44,44 +46,46 @@ void _starpu_init_rw_lock(starpu_rw_lock_t *lock)
 }
 
 
-int _starpu_take_rw_lock_write_try(starpu_rw_lock_t *lock)
+int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock)
 {
-	_take_busy_lock(lock);
-	
+	_starpu_take_busy_lock(lock);
+
 	if (lock->readercnt > 0 || lock->writer)
 	{
 		/* fail to take the lock */
-		_release_busy_lock(lock);
+		_starpu_release_busy_lock(lock);
 		return -1;
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(lock->readercnt == 0);
 		STARPU_ASSERT(lock->writer == 0);
 
 		/* no one was either writing nor reading */
 		lock->writer = 1;
-		_release_busy_lock(lock);
+		_starpu_release_busy_lock(lock);
 		return 0;
 	}
 }
 
-int _starpu_take_rw_lock_read_try(starpu_rw_lock_t *lock)
+int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock)
 {
-	_take_busy_lock(lock);
+	_starpu_take_busy_lock(lock);
 
 	if (lock->writer)
 	{
 		/* there is a writer ... */
-		_release_busy_lock(lock);
+		_starpu_release_busy_lock(lock);
 		return -1;
 	}
-	else {
+	else
+	{
 		STARPU_ASSERT(lock->writer == 0);
 
 		/* no one is writing */
 		/* XXX check wrap arounds ... */
 		lock->readercnt++;
-		_release_busy_lock(lock);
+		_starpu_release_busy_lock(lock);
 
 		return 0;
 	}
@@ -89,64 +93,71 @@ int _starpu_take_rw_lock_read_try(starpu_rw_lock_t *lock)
 
 
 
-void _starpu_take_rw_lock_write(starpu_rw_lock_t *lock)
+void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock)
 {
-	do {
-		_take_busy_lock(lock);
-		
+	do
+	{
+		_starpu_take_busy_lock(lock);
+
 		if (lock->readercnt > 0 || lock->writer)
 		{
 			/* fail to take the lock */
-			_release_busy_lock(lock);
+			_starpu_release_busy_lock(lock);
 		}
-		else {
+		else
+		{
 			STARPU_ASSERT(lock->readercnt == 0);
 			STARPU_ASSERT(lock->writer == 0);
-	
+
 			/* no one was either writing nor reading */
 			lock->writer = 1;
-			_release_busy_lock(lock);
+			_starpu_release_busy_lock(lock);
 			return;
 		}
-	} while (1);
+	}
+	while (1);
 }
 
-void _starpu_take_rw_lock_read(starpu_rw_lock_t *lock)
+void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock)
 {
-	do {
-		_take_busy_lock(lock);
+	do
+	{
+		_starpu_take_busy_lock(lock);
 
 		if (lock->writer)
 		{
 			/* there is a writer ... */
-			_release_busy_lock(lock);
+			_starpu_release_busy_lock(lock);
 		}
-		else {
+		else
+		{
 			STARPU_ASSERT(lock->writer == 0);
 
 			/* no one is writing */
 			/* XXX check wrap arounds ... */
 			lock->readercnt++;
-			_release_busy_lock(lock);
+			_starpu_release_busy_lock(lock);
 
 			return;
 		}
-	} while (1);
+	}
+	while (1);
 }
 
-void _starpu_release_rw_lock(starpu_rw_lock_t *lock)
+void _starpu_release_rw_lock(struct _starpu_rw_lock *lock)
 {
-	_take_busy_lock(lock);
+	_starpu_take_busy_lock(lock);
 	/* either writer or reader (exactly one !) */
-	if (lock->writer) 
+	if (lock->writer)
 	{
 		STARPU_ASSERT(lock->readercnt == 0);
 		lock->writer = 0;
 	}
-	else {
+	else
+	{
 		/* reading mode */
 		STARPU_ASSERT(lock->writer == 0);
 		lock->readercnt--;
 	}
-	_release_busy_lock(lock);
+	_starpu_release_busy_lock(lock);
 }

+ 9 - 9
src/common/rwlock.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,30 +22,30 @@
 #include <starpu.h>
 
 /* Dummy implementation of a RW-lock using a spinlock. */
-typedef struct starpu_rw_lock_s {
+struct _starpu_rw_lock {
 	uint32_t busy;
 	uint8_t writer;
 	uint16_t readercnt;
-} starpu_rw_lock_t;
+};
 
 /* Initialize the RW-lock */
-void _starpu_init_rw_lock(starpu_rw_lock_t *lock);
+void _starpu_init_rw_lock(struct _starpu_rw_lock *lock);
 
 /* Grab the RW-lock in a write mode */
-void _starpu_take_rw_lock_write(starpu_rw_lock_t *lock);
+void _starpu_take_rw_lock_write(struct _starpu_rw_lock *lock);
 
 /* Grab the RW-lock in a read mode */
-void _starpu_take_rw_lock_read(starpu_rw_lock_t *lock);
+void _starpu_take_rw_lock_read(struct _starpu_rw_lock *lock);
 
 /* Try to grab the RW-lock in a write mode. Returns 0 in case of success, -1
  * otherwise. */
-int _starpu_take_rw_lock_write_try(starpu_rw_lock_t *lock);
+int _starpu_take_rw_lock_write_try(struct _starpu_rw_lock *lock);
 
 /* Try to grab the RW-lock in a read mode. Returns 0 in case of success, -1
  * otherwise. */
-int _starpu_take_rw_lock_read_try(starpu_rw_lock_t *lock);
+int _starpu_take_rw_lock_read_try(struct _starpu_rw_lock *lock);
 
 /* Unlock the RW-lock. */
-void _starpu_release_rw_lock(starpu_rw_lock_t *lock);
+void _starpu_release_rw_lock(struct _starpu_rw_lock *lock);
 
 #endif

+ 10 - 7
src/common/starpu_spinlock.c

@@ -17,9 +17,10 @@
 
 #include <common/starpu_spinlock.h>
 #include <common/config.h>
+#include <common/utils.h>
 #include <starpu_util.h>
 
-int _starpu_spin_init(starpu_spinlock_t *lock)
+int _starpu_spin_init(struct _starpu_spinlock *lock)
 {
 #ifdef STARPU_SPINLOCK_CHECK
 //	memcpy(&lock->errcheck_lock, PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP, sizeof(PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP));
@@ -44,7 +45,7 @@ int _starpu_spin_init(starpu_spinlock_t *lock)
 #endif
 }
 
-int _starpu_spin_destroy(starpu_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED)
+int _starpu_spin_destroy(struct _starpu_spinlock *lock STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_SPINLOCK_CHECK
 	pthread_mutexattr_destroy(&lock->errcheck_attr);
@@ -61,7 +62,7 @@ int _starpu_spin_destroy(starpu_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED)
 #endif
 }
 
-int _starpu_spin_lock(starpu_spinlock_t *lock)
+int _starpu_spin_lock(struct _starpu_spinlock *lock)
 {
 #ifdef STARPU_SPINLOCK_CHECK
 	int ret = pthread_mutex_lock(&lock->errcheck_lock);
@@ -74,15 +75,17 @@ int _starpu_spin_lock(starpu_spinlock_t *lock)
 	return ret;
 #else
 	uint32_t prev;
-	do {
+	do
+	{
 		prev = STARPU_TEST_AND_SET(&lock->taken, 1);
-	} while (prev);
+	}
+	while (prev);
 	return 0;
 #endif
 #endif
 }
 
-int _starpu_spin_trylock(starpu_spinlock_t *lock)
+int _starpu_spin_trylock(struct _starpu_spinlock *lock)
 {
 #ifdef STARPU_SPINLOCK_CHECK
 	int ret = pthread_mutex_trylock(&lock->errcheck_lock);
@@ -101,7 +104,7 @@ int _starpu_spin_trylock(starpu_spinlock_t *lock)
 #endif
 }
 
-int _starpu_spin_unlock(starpu_spinlock_t *lock STARPU_ATTRIBUTE_UNUSED)
+int _starpu_spin_unlock(struct _starpu_spinlock *lock STARPU_ATTRIBUTE_UNUSED)
 {
 #ifdef STARPU_SPINLOCK_CHECK
 	int ret = pthread_mutex_unlock(&lock->errcheck_lock);

+ 10 - 13
src/common/starpu_spinlock.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -23,11 +23,8 @@
 #include <common/utils.h>
 #include <common/config.h>
 
-//#ifndef STARPU_SPINLOCK_CHECK
-//#define STARPU_SPINLOCK_CHECK	1
-//#endif
-
-typedef struct starpu_spinlock_s {
+struct _starpu_spinlock
+{
 #ifdef STARPU_SPINLOCK_CHECK
 	pthread_mutexattr_t errcheck_attr;
 	pthread_mutex_t errcheck_lock;
@@ -39,13 +36,13 @@ typedef struct starpu_spinlock_s {
 	uint32_t taken __attribute__ ((aligned(16)));
 #endif
 #endif
-} starpu_spinlock_t;
+};
 
-int _starpu_spin_init(starpu_spinlock_t *lock);
-int _starpu_spin_destroy(starpu_spinlock_t *lock);
+int _starpu_spin_init(struct _starpu_spinlock *lock);
+int _starpu_spin_destroy(struct _starpu_spinlock *lock);
 
-int _starpu_spin_lock(starpu_spinlock_t *lock);
-int _starpu_spin_trylock(starpu_spinlock_t *lock);
-int _starpu_spin_unlock(starpu_spinlock_t *lock);
+int _starpu_spin_lock(struct _starpu_spinlock *lock);
+int _starpu_spin_trylock(struct _starpu_spinlock *lock);
+int _starpu_spin_unlock(struct _starpu_spinlock *lock);
 
 #endif // __STARPU_SPINLOCK_H__

+ 45 - 40
src/common/timing.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -21,7 +21,7 @@
 #include <profiling/profiling.h>
 #include <common/timing.h>
 
-#ifdef HAVE_CLOCK_GETTIME
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
 #include <time.h>
 #ifndef _POSIX_C_SOURCE
 /* for clock_gettime */
@@ -34,26 +34,31 @@
 #endif
 #endif
 
-static struct timespec reference_start_time_ts;
+static struct timespec _starpu_reference_start_time_ts;
 
 /* Modern CPUs' clocks are usually not synchronized so we use a monotonic clock
  * to have consistent timing measurements. The CLOCK_MONOTONIC_RAW clock is not
  * subject to NTP adjustments, but is not available on all systems (in that
  * case we use the CLOCK_MONOTONIC clock instead). */
-static void __starpu_clock_gettime(struct timespec *ts) {
+static void _starpu_clock_readtime(struct timespec *ts)
+{
 #ifdef CLOCK_MONOTONIC_RAW
 	static int raw_supported = 0;
-	switch (raw_supported) {
+	switch (raw_supported)
+	{
 	case -1:
 		break;
 	case 1:
 		clock_gettime(CLOCK_MONOTONIC_RAW, ts);
 		return;
 	case 0:
-		if (clock_gettime(CLOCK_MONOTONIC_RAW, ts)) {
+		if (clock_gettime(CLOCK_MONOTONIC_RAW, ts))
+		{
 			raw_supported = -1;
 			break;
-		} else {
+		}
+		else
+		{
 			raw_supported = 1;
 			return;
 		}
@@ -64,24 +69,24 @@ static void __starpu_clock_gettime(struct timespec *ts) {
 
 void _starpu_timing_init(void)
 {
-	__starpu_clock_gettime(&reference_start_time_ts);
+	_starpu_clock_gettime(&_starpu_reference_start_time_ts);
 }
 
-void starpu_clock_gettime(struct timespec *ts)
+void _starpu_clock_gettime(struct timespec *ts)
 {
 	struct timespec absolute_ts;
 
 	/* Read the current time */
-	__starpu_clock_gettime(&absolute_ts);
+	_starpu_clock_readtime(&absolute_ts);
 
 	/* Compute the relative time since initialization */
-	starpu_timespec_sub(&absolute_ts, &reference_start_time_ts, ts);
+	starpu_timespec_sub(&absolute_ts, &_starpu_reference_start_time_ts, ts);
 }
 
 #else // !HAVE_CLOCK_GETTIME
 
 #if defined(__i386__) || defined(__pentium__) || defined(__pentiumpro__) || defined(__i586__) || defined(__i686__) || defined(__k6__) || defined(__k7__) || defined(__x86_64__)
-typedef union starpu_u_tick
+union starpu_u_tick
 {
   uint64_t tick;
 
@@ -91,64 +96,64 @@ typedef union starpu_u_tick
     uint32_t high;
   }
   sub;
-} starpu_tick_t;
+};
 
 #define STARPU_GET_TICK(t) __asm__ volatile("rdtsc" : "=a" ((t).sub.low), "=d" ((t).sub.high))
-#define TICK_RAW_DIFF(t1, t2) ((t2).tick - (t1).tick)
-#define TICK_DIFF(t1, t2) (TICK_RAW_DIFF(t1, t2) - residual)
+#define STARPU_TICK_RAW_DIFF(t1, t2) ((t2).tick - (t1).tick)
+#define STARPU_TICK_DIFF(t1, t2) (STARPU_TICK_RAW_DIFF(t1, t2) - _starpu_residual)
 
-static starpu_tick_t reference_start_tick;
-static double scale = 0.0;
-static unsigned long long residual = 0;
+static union starpu_u_tick _starpu_reference_start_tick;
+static double _starpu_scale = 0.0;
+static unsigned long long _starpu_residual = 0;
 
-static int inited = 0;
+static int _starpu_inited = 0;
 
 void _starpu_timing_init(void)
 {
-  static starpu_tick_t t1, t2;
+  static union starpu_u_tick t1, t2;
   int i;
 
-  if (inited) return;
+  if (_starpu_inited) return;
+
+  _starpu_residual = (unsigned long long)1 << 63;
 
-  residual = (unsigned long long)1 << 63;
-  
   for(i = 0; i < 20; i++)
     {
       STARPU_GET_TICK(t1);
       STARPU_GET_TICK(t2);
-      residual = STARPU_MIN(residual, TICK_RAW_DIFF(t1, t2));
+      _starpu_residual = STARPU_MIN(_starpu_residual, STARPU_TICK_RAW_DIFF(t1, t2));
     }
-  
+
   {
     struct timeval tv1,tv2;
-    
+
     STARPU_GET_TICK(t1);
     gettimeofday(&tv1,0);
     usleep(500000);
     STARPU_GET_TICK(t2);
     gettimeofday(&tv2,0);
-    scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
-	     (tv1.tv_sec*1e6 + tv1.tv_usec)) / 
-      (double)(TICK_DIFF(t1, t2));
+    _starpu_scale = ((tv2.tv_sec*1e6 + tv2.tv_usec) -
+		     (tv1.tv_sec*1e6 + tv1.tv_usec)) /
+      (double)(STARPU_TICK_DIFF(t1, t2));
   }
 
-  STARPU_GET_TICK(reference_start_tick);
+  STARPU_GET_TICK(_starpu_reference_start_tick);
 
-  inited = 1;
+  _starpu_inited = 1;
 }
 
-void starpu_clock_gettime(struct timespec *ts)
+void _starpu_clock_gettime(struct timespec *ts)
 {
-	starpu_tick_t tick_now;
+	union starpu_u_tick tick_now;
 
 	STARPU_GET_TICK(tick_now);
 
-	uint64_t elapsed_ticks = TICK_DIFF(reference_start_tick, tick_now);
+	uint64_t elapsed_ticks = STARPU_TICK_DIFF(_starpu_reference_start_tick, tick_now);
 
 	/* We convert this number into nano-seconds so that we can fill the
 	 * timespec structure. */
-	uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(scale*1000.0));
-	
+	uint64_t elapsed_ns = (uint64_t)(((double)elapsed_ticks)*(_starpu_scale*1000.0));
+
 	long tv_nsec = (elapsed_ns % 1000000000);
 	time_t tv_sec = (elapsed_ns / 1000000000);
 
@@ -162,7 +167,7 @@ void _starpu_timing_init(void)
 {
 }
 
-void starpu_clock_gettime(struct timespec *ts)
+void _starpu_clock_gettime(struct timespec *ts)
 {
 	timerclear(ts);
 }
@@ -173,7 +178,7 @@ void starpu_clock_gettime(struct timespec *ts)
 double starpu_timing_timespec_delay_us(struct timespec *start, struct timespec *end)
 {
 	struct timespec diff;
-	
+
 	starpu_timespec_sub(end, start, &diff);
 
 	double us = (diff.tv_sec*1e6) + (diff.tv_nsec*1e-3);
@@ -189,7 +194,7 @@ double starpu_timing_timespec_to_us(struct timespec *ts)
 double starpu_timing_now(void)
 {
 	struct timespec now;
-	starpu_clock_gettime(&now);
+	_starpu_clock_gettime(&now);
 
 	return starpu_timing_timespec_to_us(&now);
 }

+ 2 - 3
src/common/timing.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2009, 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -30,8 +30,7 @@
 #include <starpu.h>
 
 void _starpu_timing_init(void);
-void starpu_clock_gettime(struct timespec *ts);
-double starpu_timing_now(void);
+void _starpu_clock_gettime(struct timespec *ts);
 
 #endif /* TIMING_H */
 

+ 9 - 5
src/common/utils.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -19,6 +19,7 @@
 #include <common/config.h>
 #include <common/utils.h>
 #include <libgen.h>
+#include <errno.h>
 
 #ifdef __MINGW32__
 #include <io.h>
@@ -30,6 +31,7 @@
 
 int _starpu_mkpath(const char *s, mode_t mode)
 {
+	int olderrno;
 	char *q, *r = NULL, *path = NULL, *up = NULL;
 	int rv;
 
@@ -59,15 +61,17 @@ int _starpu_mkpath(const char *s, mode_t mode)
 
 	if ((mkdir(path, mode) == -1) && (errno != EEXIST))
 		rv = -1;
-	else 
+	else
 		rv = 0;
-	
+
 out:
+	olderrno = errno;
 	if (up)
 		free(up);
 
 	free(q);
 	free(path);
+	errno = olderrno;
 	return rv;
 }
 
@@ -77,7 +81,7 @@ int _starpu_check_mutex_deadlock(pthread_mutex_t *mutex)
 	ret = pthread_mutex_trylock(mutex);
 	if (!ret)
 	{
-		pthread_mutex_unlock(mutex);
+		_STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
 		return 0;
 	}
 

+ 61 - 21
src/common/utils.h

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -25,6 +25,7 @@
 #include <pthread.h>
 #include <common/barrier.h>
 #include <stdlib.h>
+#include <math.h>
 
 #ifdef STARPU_VERBOSE
 #  define _STARPU_DEBUG(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%s] " fmt ,__func__ ,##args); fflush(stderr); }} while(0)
@@ -50,31 +51,70 @@
 	} while (0)
 
 
+#define _STARPU_IS_ZERO(a) (fpclassify(a) == FP_ZERO)
+
 int _starpu_mkpath(const char *s, mode_t mode);
 int _starpu_check_mutex_deadlock(pthread_mutex_t *mutex);
 
 /* If FILE is currently on a comment line, eat it.  */
 void _starpu_drop_comments(FILE *f);
 
-#define PTHREAD_MUTEX_INIT(mutex, attr) { int p_ret = pthread_mutex_init((mutex), (attr)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_mutex_init: %s\n", strerror(p_ret)); STARPU_ABORT(); }}
-#define PTHREAD_MUTEX_DESTROY(mutex) { int p_ret = pthread_mutex_destroy(mutex); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_mutex_destroy: %s\n", strerror(p_ret)); STARPU_ABORT(); }}
-#define PTHREAD_MUTEX_LOCK(mutex) { int p_ret = pthread_mutex_lock(mutex); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_mutex_lock : %s\n", strerror(p_ret)); STARPU_ABORT(); }}
-#define PTHREAD_MUTEX_UNLOCK(mutex) { int p_ret = pthread_mutex_unlock(mutex); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_mutex_unlock : %s\n", strerror(p_ret)); STARPU_ABORT(); }}
-
-#define PTHREAD_RWLOCK_INIT(rwlock, attr) { int p_ret = pthread_rwlock_init((rwlock), (attr)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_init : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_RWLOCK_RDLOCK(rwlock) { int p_ret = pthread_rwlock_rdlock(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_rdlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_RWLOCK_WRLOCK(rwlock) { int p_ret = pthread_rwlock_wrlock(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_wrlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_RWLOCK_UNLOCK(rwlock) { int p_ret = pthread_rwlock_unlock(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_unlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_RWLOCK_DESTROY(rwlock) { int p_ret = pthread_rwlock_destroy(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-
-#define PTHREAD_COND_INIT(cond, attr) { int p_ret = pthread_cond_init((cond), (attr)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_init : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_COND_DESTROY(cond) { int p_ret = pthread_cond_destroy(cond); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_COND_SIGNAL(cond) { int p_ret = pthread_cond_signal(cond); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_signal : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_COND_BROADCAST(cond) { int p_ret = pthread_cond_broadcast(cond); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_broadcast : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_COND_WAIT(cond, mutex) { int p_ret = pthread_cond_wait((cond), (mutex)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_wait : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-
-#define PTHREAD_BARRIER_INIT(barrier, attr, count) { int p_ret = pthread_barrier_init((barrier), (attr), (count)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_barrier_init : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_BARRIER_DESTROY(barrier) { int p_ret = pthread_barrier_destroy((barrier)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_barrier_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
-#define PTHREAD_BARRIER_WAIT(barrier) { int p_ret = pthread_barrier_wait(barrier); if (STARPU_UNLIKELY(!((p_ret == 0) || (p_ret == PTHREAD_BARRIER_SERIAL_THREAD)))) { fprintf(stderr, "pthread_barrier_wait : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_MUTEX_INIT(mutex, attr) {                              \
+	int p_ret = pthread_mutex_init((mutex), (attr));                       \
+	if (STARPU_UNLIKELY(p_ret)) {                                          \
+		fprintf(stderr,                                                \
+			"%s:%d pthread_mutex_init: %s\n",                      \
+			__FILE__, __LINE__, strerror(p_ret));                  \
+		STARPU_ABORT();                                                \
+	}                                                                      \
+}
+#define _STARPU_PTHREAD_MUTEX_DESTROY(mutex) {                                 \
+	int p_ret = pthread_mutex_destroy(mutex);                              \
+	if (STARPU_UNLIKELY(p_ret)) {                                          \
+		fprintf(stderr,                                                \
+			"%s:%d pthread_mutex_destroy: %s\n",                   \
+			__FILE__, __LINE__, strerror(p_ret));                  \
+		STARPU_ABORT();                                                \
+	}                                                                      \
+}
+#define _STARPU_PTHREAD_MUTEX_LOCK(mutex) {                                    \
+	int p_ret = pthread_mutex_lock(mutex);                                 \
+	if (STARPU_UNLIKELY(p_ret)) {                                          \
+		fprintf(stderr,                                                \
+			"%s:%d pthread_mutex_lock : %s\n",                     \
+			__FILE__, __LINE__, strerror(p_ret));                  \
+		STARPU_ABORT();                                                \
+	}                                                                      \
+}
+
+#define _STARPU_PTHREAD_MUTEX_UNLOCK(mutex) {                                  \
+	int p_ret = pthread_mutex_unlock(mutex);                               \
+	if (STARPU_UNLIKELY(p_ret)) {                                          \
+		fprintf(stderr,                                                \
+			"%s:%d pthread_mutex_unlock : %s\n",                   \
+			__FILE__, __LINE__, strerror(p_ret));                  \
+		STARPU_ABORT();                                                \
+	}                                                                      \
+}
+
+#define _STARPU_PTHREAD_RWLOCK_INIT(rwlock, attr) { int p_ret = pthread_rwlock_init((rwlock), (attr)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_init : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_RWLOCK_RDLOCK(rwlock) { int p_ret = pthread_rwlock_rdlock(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_rdlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_RWLOCK_WRLOCK(rwlock) { int p_ret = pthread_rwlock_wrlock(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_wrlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_RWLOCK_UNLOCK(rwlock) { int p_ret = pthread_rwlock_unlock(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_unlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_RWLOCK_DESTROY(rwlock) { int p_ret = pthread_rwlock_destroy(rwlock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_rwlock_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+
+#define _STARPU_PTHREAD_COND_INIT(cond, attr) { int p_ret = pthread_cond_init((cond), (attr)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_init : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_COND_DESTROY(cond) { int p_ret = pthread_cond_destroy(cond); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_COND_SIGNAL(cond) { int p_ret = pthread_cond_signal(cond); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_signal : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_COND_BROADCAST(cond) { int p_ret = pthread_cond_broadcast(cond); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_broadcast : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_COND_WAIT(cond, mutex) { int p_ret = pthread_cond_wait((cond), (mutex)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_cond_wait : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+
+#define _STARPU_PTHREAD_BARRIER_INIT(barrier, attr, count) { int p_ret = pthread_barrier_init((barrier), (attr), (count)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_barrier_init : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_BARRIER_DESTROY(barrier) { int p_ret = pthread_barrier_destroy((barrier)); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_barrier_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_BARRIER_WAIT(barrier) { int p_ret = pthread_barrier_wait(barrier); if (STARPU_UNLIKELY(!((p_ret == 0) || (p_ret == PTHREAD_BARRIER_SERIAL_THREAD)))) { fprintf(stderr, "pthread_barrier_wait : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+
+#define _STARPU_PTHREAD_SPIN_DESTROY(lock) { int p_ret = pthread_spin_destroy(lock); if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_spin_destroy : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_SPIN_LOCK(lock) { int p_ret = pthread_spin_lock(lock);  if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_spin_lock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
+#define _STARPU_PTHREAD_SPIN_UNLOCK(lock) { int p_ret = pthread_spin_unlock(lock);  if (STARPU_UNLIKELY(p_ret)) { fprintf(stderr, "pthread_spin_unlock : %s\n", strerror(p_ret)); STARPU_ABORT();}}
 
 #endif // __COMMON_UTILS_H__

+ 19 - 7
src/debug/latency.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010  Université de Bordeaux 1
- * Copyright (C) 2010  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -20,7 +20,7 @@
 #include <common/config.h>
 #include <datawizard/coherency.h>
 
-void _starpu_benchmark_ping_pong(starpu_data_handle handle,
+void _starpu_benchmark_ping_pong(starpu_data_handle_t handle,
 			unsigned node0, unsigned node1, unsigned niter)
 {
 	/* We assume that no one is using that handle !! */
@@ -29,12 +29,24 @@ void _starpu_benchmark_ping_pong(starpu_data_handle handle,
 	{
 		int ret;
 
-		struct starpu_data_replicate_s *replicate_0 = &handle->per_node[node0];
-		ret = _starpu_fetch_data_on_node(handle, replicate_0, STARPU_RW, 0, NULL, NULL);
+		_starpu_spin_lock(&handle->header_lock);
+		handle->refcnt++;
+		handle->busy_count++;
+		_starpu_spin_unlock(&handle->header_lock);
+
+		struct _starpu_data_replicate *replicate_0 = &handle->per_node[node0];
+		ret = _starpu_fetch_data_on_node(handle, replicate_0, STARPU_RW, 0, 0, NULL, NULL);
 		STARPU_ASSERT(!ret);
+		_starpu_release_data_on_node(handle, 0, replicate_0);
+
+		_starpu_spin_lock(&handle->header_lock);
+		handle->refcnt++;
+		handle->busy_count++;
+		_starpu_spin_unlock(&handle->header_lock);
 
-		struct starpu_data_replicate_s *replicate_1 = &handle->per_node[node1];
-		ret = _starpu_fetch_data_on_node(handle, replicate_1, STARPU_RW, 0, NULL, NULL);
+		struct _starpu_data_replicate *replicate_1 = &handle->per_node[node1];
+		ret = _starpu_fetch_data_on_node(handle, replicate_1, STARPU_RW, 0, 0, NULL, NULL);
 		STARPU_ASSERT(!ret);
+		_starpu_release_data_on_node(handle, 0, replicate_1);
 	}
 }

+ 3 - 2
src/debug/starpu_debug_helpers.h

@@ -22,11 +22,12 @@
 #include <starpu_util.h>
 
 #ifdef __cplusplus
-extern "C" {
+extern "C"
+{
 #endif
 
 /* Perform a ping pong between the two memory nodes */
-void _starpu_benchmark_ping_pong(starpu_data_handle handle, unsigned node0, unsigned node1, unsigned niter);
+void _starpu_benchmark_ping_pong(starpu_data_handle_t handle, unsigned node0, unsigned node1, unsigned niter);
 
 /* Display the size of different data structures */
 void _starpu_debug_display_structures_size(void);

+ 10 - 10
src/debug/structures_size.c

@@ -25,14 +25,14 @@ void _starpu_debug_display_structures_size(void)
 {
 	fprintf(stderr, "struct starpu_task\t\t%u bytes\t(%x)\n",
 			(unsigned) sizeof(struct starpu_task), (unsigned) sizeof(struct starpu_task));
-	fprintf(stderr, "struct starpu_job_s\t\t%u bytes\t(%x)\n",
-			(unsigned) sizeof(struct starpu_job_s), (unsigned) sizeof(struct starpu_job_s));
-	fprintf(stderr, "struct starpu_data_state_t\t%u bytes\t(%x)\n",
-			(unsigned) sizeof(struct starpu_data_state_t), (unsigned) sizeof(struct starpu_data_state_t));
-	fprintf(stderr, "struct starpu_tag_s\t\t%u bytes\t(%x)\n",
-			(unsigned) sizeof(struct starpu_tag_s), (unsigned) sizeof(struct starpu_tag_s));
-	fprintf(stderr, "struct starpu_cg_s\t\t%u bytes\t(%x)\n",
-			(unsigned) sizeof(struct starpu_cg_s), (unsigned) sizeof(struct starpu_cg_s));
-	fprintf(stderr, "struct starpu_worker_s\t\t%u bytes\t(%x)\n",
-			(unsigned) sizeof(struct starpu_worker_s), (unsigned) sizeof(struct starpu_worker_s));
+	fprintf(stderr, "struct _starpu_job\t\t%u bytes\t(%x)\n",
+			(unsigned) sizeof(struct _starpu_job), (unsigned) sizeof(struct _starpu_job));
+	fprintf(stderr, "struct _starpu_data_state\t%u bytes\t(%x)\n",
+			(unsigned) sizeof(struct _starpu_data_state), (unsigned) sizeof(struct _starpu_data_state));
+	fprintf(stderr, "struct _starpu_tag\t\t%u bytes\t(%x)\n",
+			(unsigned) sizeof(struct _starpu_tag), (unsigned) sizeof(struct _starpu_tag));
+	fprintf(stderr, "struct _starpu_cg\t\t%u bytes\t(%x)\n",
+			(unsigned) sizeof(struct _starpu_cg), (unsigned) sizeof(struct _starpu_cg));
+	fprintf(stderr, "struct _starpu_worker\t\t%u bytes\t(%x)\n",
+			(unsigned) sizeof(struct _starpu_worker), (unsigned) sizeof(struct _starpu_worker));
 }

+ 209 - 161
src/debug/traces/starpu_fxt.c

@@ -20,13 +20,12 @@
 #ifdef STARPU_USE_FXT
 #include "starpu_fxt.h"
 #include <inttypes.h>
-#include <common/hash.h>
+#include <starpu_hash.h>
 
 static char *cpus_worker_colors[STARPU_NMAXWORKERS] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
 static char *cuda_worker_colors[STARPU_NMAXWORKERS] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
 static char *opencl_worker_colors[STARPU_NMAXWORKERS] = {"/blues9/9", "/blues9/6", "/blues9/3", "/blues9/1", "/blues9/8", "/blues9/7", "/blues9/4", "/blues9/2",  "/blues9/1"};
 static char *other_worker_colors[STARPU_NMAXWORKERS] = {"/greys9/9", "/greys9/8", "/greys9/7", "/greys9/6"};
-
 static char *worker_colors[STARPU_NMAXWORKERS];
 
 static unsigned opencl_index = 0;
@@ -62,22 +61,22 @@ static const char *get_worker_color(int workerid)
 static unsigned get_colour_symbol_red(char *name)
 {
 	/* choose some colour ... that's disguting yes */
-	uint32_t hash_symbol = _starpu_crc32_string(name, 0);
-	return (unsigned)_starpu_crc32_string("red", hash_symbol) % 1024;
+	uint32_t hash_symbol = starpu_crc32_string(name, 0);
+	return (unsigned)starpu_crc32_string("red", hash_symbol) % 1024;
 }
 
 static unsigned get_colour_symbol_green(char *name)
 {
 	/* choose some colour ... that's disguting yes */
-	uint32_t hash_symbol = _starpu_crc32_string(name, 0);
-	return (unsigned)_starpu_crc32_string("green", hash_symbol) % 1024;
+	uint32_t hash_symbol = starpu_crc32_string(name, 0);
+	return (unsigned)starpu_crc32_string("green", hash_symbol) % 1024;
 }
 
 static unsigned get_colour_symbol_blue(char *name)
 {
 	/* choose some colour ... that's disguting yes */
-	uint32_t hash_symbol = _starpu_crc32_string(name, 0);
-	return (unsigned)_starpu_crc32_string("blue", hash_symbol) % 1024;
+	uint32_t hash_symbol = starpu_crc32_string(name, 0);
+	return (unsigned)starpu_crc32_string("blue", hash_symbol) % 1024;
 }
 
 static float last_codelet_start[STARPU_NMAXWORKERS];
@@ -90,20 +89,21 @@ static double last_activity_flush_timestamp[STARPU_NMAXWORKERS];
 static double accumulated_sleep_time[STARPU_NMAXWORKERS];
 static double accumulated_exec_time[STARPU_NMAXWORKERS];
 
-LIST_TYPE(symbol_name,
+LIST_TYPE(_starpu_symbol_name,
 	char *name;
 )
 
-static symbol_name_list_t symbol_list;
+static struct _starpu_symbol_name_list *symbol_list;
 
-LIST_TYPE(communication,
+LIST_TYPE(_starpu_communication,
 	unsigned comid;
-	float comm_start;	
+	float comm_start;
 	float bandwidth;
-	unsigned node;
+	unsigned src_node;
+	unsigned dst_node;
 )
 
-static communication_list_t communication_list;
+static struct _starpu_communication_list *communication_list;
 
 /*
  * Paje trace file tools
@@ -175,7 +175,7 @@ static void update_accumulated_time(int worker, double sleep_time, double exec_t
 	 * point in our graph */
 	double elapsed = current_timestamp - last_activity_flush_timestamp[worker];
 	if (forceflush || (elapsed > ACTIVITY_PERIOD))
-	{		
+	{
 		if (activity_file)
 			fprintf(activity_file, "%d\t%f\t%f\t%f\t%f\n", worker, current_timestamp, elapsed, accumulated_exec_time[worker], accumulated_sleep_time[worker]);
 
@@ -197,7 +197,7 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 	if (out_paje_file)
 	{
 		fprintf(out_paje_file, "7       %f	%"PRIu64"      Mn      %sp	%sMEMNODE%"PRIu64"\n", get_event_time_stamp(ev, options), ev->param[0], prefix, options->file_prefix, ev->param[0]);
-	
+
 		if (!options->no_bus)
 			fprintf(out_paje_file, "13       %f bw %sMEMNODE%"PRIu64" 0.0\n", 0.0f, prefix, ev->param[0]);
 	}
@@ -205,10 +205,10 @@ static void handle_new_mem_node(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
 static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
-	/* 
+	/*
 	   arg0 : type of worker (cuda, cpu ..)
 	   arg1 : memory node
-	   arg2 : thread id 
+	   arg2 : thread id
 	*/
 	char *prefix = options->file_prefix;
 
@@ -222,22 +222,23 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 	char *kindstr = "";
 	enum starpu_perf_archtype archtype = 0;
 
-	switch (ev->param[0]) {
-		case STARPU_FUT_APPS_KEY:
+	switch (ev->param[0])
+	{
+		case _STARPU_FUT_APPS_KEY:
 			set_next_other_worker_color(workerid);
 			kindstr = "apps";
 			break;
-		case STARPU_FUT_CPU_KEY:
+		case _STARPU_FUT_CPU_KEY:
 			set_next_cpu_worker_color(workerid);
 			kindstr = "cpu";
 			archtype = STARPU_CPU_DEFAULT;
 			break;
-		case STARPU_FUT_CUDA_KEY:
+		case _STARPU_FUT_CUDA_KEY:
 			set_next_cuda_worker_color(workerid);
 			kindstr = "cuda";
 			archtype = STARPU_CUDA_DEFAULT + devid;
 			break;
-		case STARPU_FUT_OPENCL_KEY:
+		case _STARPU_FUT_OPENCL_KEY:
 			set_next_opencl_worker_color(workerid);
 			kindstr = "opencl";
 			archtype = STARPU_OPENCL_DEFAULT + devid;
@@ -293,10 +294,10 @@ static void handle_worker_deinit_end(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
 static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options *options)
 {
-	symbol_name_itor_t itor;
-	for (itor = symbol_name_list_begin(symbol_list);
-		itor != symbol_name_list_end(symbol_list);
-		itor = symbol_name_list_next(itor))
+	struct _starpu_symbol_name *itor;
+	for (itor = _starpu_symbol_name_list_begin(symbol_list);
+		itor != _starpu_symbol_name_list_end(symbol_list);
+		itor = _starpu_symbol_name_list_next(itor))
 	{
 		if (!strcmp(name, itor->name))
 		{
@@ -306,32 +307,34 @@ static void create_paje_state_if_not_found(char *name, struct starpu_fxt_options
 	}
 
 	/* it's the first time ... */
-	symbol_name_t entry = symbol_name_new();
-		entry->name = malloc(strlen(name));
-		strcpy(entry->name, name);
+	struct _starpu_symbol_name *entry = _starpu_symbol_name_new();
+	entry->name = malloc(strlen(name));
+	strcpy(entry->name, name);
 
-	symbol_name_list_push_front(symbol_list, entry);
+	_starpu_symbol_name_list_push_front(symbol_list, entry);
 
-	float red, green, blue;
 	/* choose some colour ... that's disguting yes */
 	unsigned hash_symbol_red = get_colour_symbol_red(name);
 	unsigned hash_symbol_green = get_colour_symbol_green(name);
 	unsigned hash_symbol_blue = get_colour_symbol_blue(name);
-	
+
 	uint32_t hash_sum = hash_symbol_red + hash_symbol_green + hash_symbol_blue;
-	
+
+	float red, green, blue;
 	if (options->per_task_colour)
 	{
 		red = (1.0f * hash_symbol_red) / hash_sum;
 		green = (1.0f * hash_symbol_green) / hash_sum;
 		blue = (1.0f * hash_symbol_blue) / hash_sum;
 	}
-	else {
+	else
+	{
 		/* Use the hardcoded value for execution mode */
 		red = 0.0f;
 		green = 0.6f;
 		blue = 0.4f;
 	}
+
 	/* create the Paje state */
 	if (out_paje_file)
 	{
@@ -423,7 +426,7 @@ static void handle_end_codelet_body(struct fxt_ev_64 *ev, struct starpu_fxt_opti
 	float codelet_length = (end_codelet_time - last_codelet_start[worker]);
 
 	update_accumulated_time(worker, 0.0, codelet_length, end_codelet_time, 0);
-	
+
 	if (distrib_time)
 	fprintf(distrib_time, "%s\t%s%d\t%ld\t%"PRIx32"\t%f\n", last_codelet_symbol[worker],
 				prefix, worker, codelet_size, codelet_hash, codelet_length);
@@ -456,11 +459,12 @@ static void handle_user_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *o
 	if (worker < 0)
 	{
 		if (out_paje_file)
-		fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
+			fprintf(out_paje_file, "9       %f     event      %sp      %lu\n", get_event_time_stamp(ev, options), prefix, code);
 	}
-	else {
+	else
+	{
 		if (out_paje_file)
-		fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
+			fprintf(out_paje_file, "9       %f     event      %s%"PRIu64"      %lu\n", get_event_time_stamp(ev, options), prefix, ev->param[1], code);
 	}
 }
 
@@ -472,7 +476,7 @@ static void handle_start_callback(struct fxt_ev_64 *ev, struct starpu_fxt_option
 		return;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      C\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
 }
 
 static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -483,7 +487,7 @@ static void handle_end_callback(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		return;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n", get_event_time_stamp(ev, options), options->file_prefix, ev->param[1] );
 }
 
 static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *newstatus)
@@ -494,8 +498,8 @@ static void handle_worker_status(struct fxt_ev_64 *ev, struct starpu_fxt_options
 		return;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
-		get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      %s\n",
+			get_event_time_stamp(ev, options), options->file_prefix, ev->param[1], newstatus);
 }
 
 static double last_sleep_start[STARPU_NMAXWORKERS];
@@ -510,8 +514,8 @@ static void handle_start_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *
 	last_sleep_start[worker] = start_sleep_time;
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
-				get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      Sl\n",
+			get_event_time_stamp(ev, options), options->file_prefix, ev->param[0]);
 }
 
 static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -523,8 +527,8 @@ static void handle_end_sleep(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 	float end_sleep_timestamp = get_event_time_stamp(ev, options);
 
 	if (out_paje_file)
-	fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
-				end_sleep_timestamp, options->file_prefix, ev->param[0]);
+		fprintf(out_paje_file, "10       %f	S      %s%"PRIu64"      B\n",
+			end_sleep_timestamp, options->file_prefix, ev->param[0]);
 
 	double sleep_length = end_sleep_timestamp - last_sleep_start[worker];
 
@@ -553,17 +557,14 @@ static void handle_start_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 		}
 
 		/* create a structure to store the start of the communication, this will be matched later */
-		communication_t com = communication_new();
+		struct _starpu_communication *com = _starpu_communication_new();
 		com->comid = comid;
 		com->comm_start = get_event_time_stamp(ev, options);
 
-#ifdef STARPU_DEVEL
-#warning this is wrong with peers
-#endif
-		/* that's a hack: either src or dst is non null */
-		com->node = (src + dst);
+		com->src_node = src;
+		com->dst_node = dst;
 
-		communication_list_push_back(communication_list, com);
+		_starpu_communication_list_push_back(communication_list, com);
 	}
 
 }
@@ -573,7 +574,7 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 	unsigned dst = ev->param[1];
 	unsigned size = ev->param[2];
 	unsigned comid = ev->param[3];
-	
+
 	char *prefix = options->file_prefix;
 
 	if (!options->no_bus)
@@ -585,10 +586,10 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 		}
 
 		/* look for a data transfer to match */
-		communication_itor_t itor;
-		for (itor = communication_list_begin(communication_list);
-			itor != communication_list_end(communication_list);
-			itor = communication_list_next(itor))
+		struct _starpu_communication *itor;
+		for (itor = _starpu_communication_list_begin(communication_list);
+			itor != _starpu_communication_list_end(communication_list);
+			itor = _starpu_communication_list_next(itor))
 		{
 			if (itor->comid == comid)
 			{
@@ -597,14 +598,15 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 
 				itor->bandwidth = bandwidth;
 
-				communication_t com = communication_new();
+				struct _starpu_communication *com = _starpu_communication_new();
 				com->comid = comid;
 				com->comm_start = get_event_time_stamp(ev, options);
 				com->bandwidth = -bandwidth;
 
-				com->node = itor->node;
+				com->src_node = itor->src_node;
+				com->dst_node = itor->dst_node;
 
-				communication_list_push_back(communication_list, com);
+				_starpu_communication_list_push_back(communication_list, com);
 
 				break;
 			}
@@ -612,6 +614,29 @@ static void handle_end_driver_copy(struct fxt_ev_64 *ev, struct starpu_fxt_optio
 	}
 }
 
+static void handle_start_driver_copy_async(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+	unsigned dst = ev->param[1];
+
+	char *prefix = options->file_prefix;
+
+	if (!options->no_bus)
+		if (out_paje_file)
+			fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      CoA\n", get_event_time_stamp(ev, options), prefix, dst);
+
+}
+
+static void handle_end_driver_copy_async(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
+{
+	unsigned dst = ev->param[1];
+
+	char *prefix = options->file_prefix;
+
+	if (!options->no_bus)
+		if (out_paje_file)
+			fprintf(out_paje_file, "10       %f     MS      %sMEMNODE%u      Co\n", get_event_time_stamp(ev, options), prefix, dst);
+}
+
 static void handle_memnode_event(struct fxt_ev_64 *ev, struct starpu_fxt_options *options, const char *eventstr)
 {
 	unsigned memnode = ev->param[0];
@@ -649,14 +674,14 @@ static void handle_job_pop(struct fxt_ev_64 *ev, struct starpu_fxt_options *opti
 		fprintf(out_paje_file, "13       %f ntask %ssched %f\n", current_timestamp, options->file_prefix, (float)curq_size);
 
 	if (activity_file)
-	fprintf(activity_file, "cnt_ready\t%f\t%d\n", current_timestamp, curq_size);
+		fprintf(activity_file, "cnt_ready\t%f\t%d\n", current_timestamp, curq_size);
 }
 
 static
 void handle_update_task_cnt(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	float current_timestamp = get_event_time_stamp(ev, options);
-	unsigned long nsubmitted = ev->param[0]; 
+	unsigned long nsubmitted = ev->param[0];
 	if (activity_file)
 	fprintf(activity_file, "cnt_submitted\t%f\t%lu\n", current_timestamp, nsubmitted);
 }
@@ -666,10 +691,10 @@ static void handle_codelet_tag_deps(struct fxt_ev_64 *ev)
 	uint64_t child;
 	uint64_t father;
 
-	child = ev->param[0]; 
-	father = ev->param[1]; 
+	child = ev->param[0];
+	father = ev->param[1];
 
-	starpu_fxt_dag_add_tag_deps(child, father);
+	_starpu_fxt_dag_add_tag_deps(child, father);
 }
 
 static void handle_task_deps(struct fxt_ev_64 *ev)
@@ -678,38 +703,39 @@ static void handle_task_deps(struct fxt_ev_64 *ev)
 	unsigned long dep_succ = ev->param[1];
 
 	/* There is a dependency between both job id : dep_prev -> dep_succ */
-	starpu_fxt_dag_add_task_deps(dep_prev, dep_succ);
+	_starpu_fxt_dag_add_task_deps(dep_prev, dep_succ);
 }
 
 static void handle_task_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
 {
 	unsigned long job_id;
 	job_id = ev->param[0];
-	unsigned sched_ctx = ev->param[1];
 
-	unsigned long has_name = ev->param[4];
-	char *name = has_name?(char *)&ev->param[5]:"unknown";
+	unsigned long has_name = ev->param[3];
+	char *name = has_name?(char *)&ev->param[4]:"unknown";
 
         int worker;
-        worker = find_worker_id(ev->param[2]);
+        worker = find_worker_id(ev->param[1]);
 
 	const char *colour;
 	char buffer[32];
-	if (options->per_task_colour) {
+	if (options->per_task_colour)
+	{
 		snprintf(buffer, 32, "#%x%x%x",
-			get_colour_symbol_red(name)/4,
-			get_colour_symbol_green(name)/4,
-			get_colour_symbol_blue(name)/4);
+			 get_colour_symbol_red(name)/4,
+			 get_colour_symbol_green(name)/4,
+			 get_colour_symbol_blue(name)/4);
 		colour = &buffer[0];
 	}
-	else {
-		colour = (worker < 0)?"#aaaaaa":get_worker_color(worker);
+	else
+	{
+		colour= (worker < 0)?"#aaaaaa":get_worker_color(worker);
 	}
 
-	unsigned exclude_from_dag = ev->param[3];
+	unsigned exclude_from_dag = ev->param[2];
 
 	if (!exclude_from_dag)
-		starpu_fxt_dag_set_task_done(job_id, name, colour);
+		_starpu_fxt_dag_set_task_done(job_id, name, colour);
 }
 
 static void handle_tag_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -725,18 +751,20 @@ static void handle_tag_done(struct fxt_ev_64 *ev, struct starpu_fxt_options *opt
 
 	const char *colour;
 	char buffer[32];
-	if (options->per_task_colour) {
+	if (options->per_task_colour)
+	{
 		snprintf(buffer, 32, "%.4f,%.4f,%.4f",
-			get_colour_symbol_red(name)/1024.0,
-			get_colour_symbol_green(name)/1024.0,
-			get_colour_symbol_blue(name)/1024.0);
+			 get_colour_symbol_red(name)/1024.0,
+			 get_colour_symbol_green(name)/1024.0,
+			 get_colour_symbol_blue(name)/1024.0);
 		colour = &buffer[0];
 	}
-	else {
+	else
+	{
 		colour= (worker < 0)?"0.0,0.0,0.0":get_worker_color(worker);
 	}
 
-	starpu_fxt_dag_set_tag_done(tag_id, colour);
+	_starpu_fxt_dag_set_tag_done(tag_id, colour);
 }
 
 static void handle_mpi_barrier(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -757,7 +785,7 @@ static void handle_mpi_isend(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 	size_t size = ev->param[2];
 	float date = get_event_time_stamp(ev, options);
 
-	starpu_fxt_mpi_add_send_transfer(options->file_rank, dest, mpi_tag, size, date);
+	_starpu_fxt_mpi_add_send_transfer(options->file_rank, dest, mpi_tag, size, date);
 }
 
 static void handle_mpi_irecv_end(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -766,7 +794,7 @@ static void handle_mpi_irecv_end(struct fxt_ev_64 *ev, struct starpu_fxt_options
 	int mpi_tag = ev->param[1];
 	float date = get_event_time_stamp(ev, options);
 
-	starpu_fxt_mpi_add_recv_transfer(src, options->file_rank, mpi_tag, date);
+	_starpu_fxt_mpi_add_recv_transfer(src, options->file_rank, mpi_tag, date);
 }
 
 static void handle_set_profiling(struct fxt_ev_64 *ev, struct starpu_fxt_options *options)
@@ -779,31 +807,30 @@ static void handle_set_profiling(struct fxt_ev_64 *ev, struct starpu_fxt_options
 
 static void handle_task_wait_for_all(void)
 {
-	starpu_fxt_dag_add_sync_point();
+	_starpu_fxt_dag_add_sync_point();
 }
 
 static
 void _starpu_fxt_display_bandwidth(struct starpu_fxt_options *options)
 {
-	float current_bandwidth = 0.0;
-	float current_bandwidth_per_node[32] = {0.0};
+	float current_bandwidth_per_node[STARPU_MAXNODES] = {0.0};
 
 	char *prefix = options->file_prefix;
 
-	communication_itor_t itor;
-	for (itor = communication_list_begin(communication_list);
-		itor != communication_list_end(communication_list);
-		itor = communication_list_next(itor))
+	struct _starpu_communication*itor;
+	for (itor = _starpu_communication_list_begin(communication_list);
+		itor != _starpu_communication_list_end(communication_list);
+		itor = _starpu_communication_list_next(itor))
 	{
-		current_bandwidth += itor->bandwidth;
+		current_bandwidth_per_node[itor->src_node] +=  itor->bandwidth;
 		if (out_paje_file)
-		fprintf(out_paje_file, "13  %f bw %sMEMNODE0 %f\n",
-				itor->comm_start, prefix, current_bandwidth);
+		fprintf(out_paje_file, "13  %f bw %sMEMNODE%u %f\n",
+				itor->comm_start, prefix, itor->src_node, current_bandwidth_per_node[itor->src_node]);
 
-		current_bandwidth_per_node[itor->node] +=  itor->bandwidth;
+		current_bandwidth_per_node[itor->dst_node] +=  itor->bandwidth;
 		if (out_paje_file)
 		fprintf(out_paje_file, "13  %f bw %sMEMNODE%u %f\n",
-				itor->comm_start, prefix, itor->node, current_bandwidth_per_node[itor->node]);
+				itor->comm_start, prefix, itor->dst_node, current_bandwidth_per_node[itor->dst_node]);
 	}
 }
 
@@ -817,26 +844,28 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 	/* Open the trace file */
 	int fd_in;
 	fd_in = open(filename_in, O_RDONLY);
-	if (fd_in < 0) {
+	if (fd_in < 0)
+	{
 	        perror("open failed :");
 	        exit(-1);
 	}
 
 	static fxt_t fut;
 	fut = fxt_fdopen(fd_in);
-	if (!fut) {
+	if (!fut)
+	{
 	        perror("fxt_fdopen :");
 	        exit(-1);
 	}
-	
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
 	/* create a htable to identify each worker(tid) */
 	hcreate(STARPU_NMAXWORKERS);
 
-	symbol_list = symbol_name_list_new(); 
-	communication_list = communication_list_new();
+	symbol_list = _starpu_symbol_name_list_new();
+	communication_list = _starpu_communication_list_new();
 
 	char *prefix = options->file_prefix;
 
@@ -854,148 +883,161 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 	}
 
 	struct fxt_ev_64 ev;
-	while(1) {
+	while(1)
+	{
 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
-		if (ret != FXT_EV_OK) {
+		if (ret != FXT_EV_OK)
+		{
 			break;
 		}
 
-		switch (ev.code) {
-			case STARPU_FUT_WORKER_INIT_START:
+		switch (ev.code)
+		{
+			case _STARPU_FUT_WORKER_INIT_START:
 				handle_worker_init_start(&ev, options);
 				break;
 
-			case STARPU_FUT_WORKER_INIT_END:
+			case _STARPU_FUT_WORKER_INIT_END:
 				handle_worker_init_end(&ev, options);
 				break;
 
-			case STARPU_FUT_NEW_MEM_NODE:
+			case _STARPU_FUT_NEW_MEM_NODE:
 				handle_new_mem_node(&ev, options);
 				break;
 
 			/* detect when the workers were idling or not */
-			case STARPU_FUT_START_CODELET_BODY:
+			case _STARPU_FUT_START_CODELET_BODY:
 				handle_start_codelet_body(&ev, options);
 				break;
-			case STARPU_FUT_END_CODELET_BODY:
+			case _STARPU_FUT_END_CODELET_BODY:
 				handle_end_codelet_body(&ev, options);
 				break;
 
-			case STARPU_FUT_START_CALLBACK:
+			case _STARPU_FUT_START_CALLBACK:
 				handle_start_callback(&ev, options);
 				break;
-			case STARPU_FUT_END_CALLBACK:
+			case _STARPU_FUT_END_CALLBACK:
 				handle_end_callback(&ev, options);
 				break;
 
-			case STARPU_FUT_UPDATE_TASK_CNT:
+			case _STARPU_FUT_UPDATE_TASK_CNT:
 				handle_update_task_cnt(&ev, options);
 				break;
 
 			/* monitor stack size */
-			case STARPU_FUT_JOB_PUSH:
+			case _STARPU_FUT_JOB_PUSH:
 				handle_job_push(&ev, options);
 				break;
-			case STARPU_FUT_JOB_POP:
+			case _STARPU_FUT_JOB_POP:
 				handle_job_pop(&ev, options);
 				break;
 
 			/* check the memory transfer overhead */
-			case STARPU_FUT_START_FETCH_INPUT:
+			case _STARPU_FUT_START_FETCH_INPUT:
 				handle_worker_status(&ev, options, "Fi");
 				break;
 
-			case STARPU_FUT_START_PUSH_OUTPUT:
+			case _STARPU_FUT_START_PUSH_OUTPUT:
 				handle_worker_status(&ev, options, "Po");
 				break;
 
-			case STARPU_FUT_START_PROGRESS:
+			case _STARPU_FUT_START_PROGRESS:
 				handle_worker_status(&ev, options, "P");
 				break;
 
-			case STARPU_FUT_END_FETCH_INPUT:
-			case STARPU_FUT_END_PROGRESS:
-			case STARPU_FUT_END_PUSH_OUTPUT:
+			case _STARPU_FUT_END_FETCH_INPUT:
+			case _STARPU_FUT_END_PROGRESS:
+			case _STARPU_FUT_END_PUSH_OUTPUT:
 				handle_worker_status(&ev, options, "B");
 				break;
 
-			case STARPU_FUT_WORKER_SLEEP_START:
+			case _STARPU_FUT_WORKER_SLEEP_START:
 				handle_start_sleep(&ev, options);
 				break;
 
-			case STARPU_FUT_WORKER_SLEEP_END:
+			case _STARPU_FUT_WORKER_SLEEP_END:
 				handle_end_sleep(&ev, options);
 				break;
 
-			case STARPU_FUT_TAG:
+			case _STARPU_FUT_TAG:
 				/* XXX */
 				break;
 
-			case STARPU_FUT_TAG_DEPS:
+			case _STARPU_FUT_TAG_DEPS:
 				handle_codelet_tag_deps(&ev);
 				break;
 
-			case STARPU_FUT_TASK_DEPS:
+			case _STARPU_FUT_TASK_DEPS:
 				handle_task_deps(&ev);
 				break;
 
-			case STARPU_FUT_TASK_DONE:
+			case _STARPU_FUT_TASK_DONE:
 				handle_task_done(&ev, options);
 				break;
 
-			case STARPU_FUT_TAG_DONE:
+			case _STARPU_FUT_TAG_DONE:
 				handle_tag_done(&ev, options);
 				break;
 
-			case STARPU_FUT_DATA_COPY:
+			case _STARPU_FUT_DATA_COPY:
 				if (!options->no_bus)
 				handle_data_copy();
 				break;
 
-			case STARPU_FUT_START_DRIVER_COPY:
+			case _STARPU_FUT_START_DRIVER_COPY:
 				if (!options->no_bus)
 				handle_start_driver_copy(&ev, options);
 				break;
 
-			case STARPU_FUT_END_DRIVER_COPY:
+			case _STARPU_FUT_END_DRIVER_COPY:
 				if (!options->no_bus)
 				handle_end_driver_copy(&ev, options);
 				break;
 
-			case STARPU_FUT_WORK_STEALING:
+			case _STARPU_FUT_START_DRIVER_COPY_ASYNC:
+				if (!options->no_bus)
+				handle_start_driver_copy_async(&ev, options);
+				break;
+
+			case _STARPU_FUT_END_DRIVER_COPY_ASYNC:
+				if (!options->no_bus)
+				handle_end_driver_copy_async(&ev, options);
+				break;
+
+			case _STARPU_FUT_WORK_STEALING:
 				/* XXX */
 				break;
 
-			case STARPU_FUT_WORKER_DEINIT_START:
+			case _STARPU_FUT_WORKER_DEINIT_START:
 				handle_worker_deinit_start(&ev, options);
 				break;
 
-			case STARPU_FUT_WORKER_DEINIT_END:
+			case _STARPU_FUT_WORKER_DEINIT_END:
 				handle_worker_deinit_end(&ev, options);
 				break;
 
-			case STARPU_FUT_START_ALLOC:
+			case _STARPU_FUT_START_ALLOC:
 				if (!options->no_bus)
 				handle_memnode_event(&ev, options, "A");
 				break;
 
-			case STARPU_FUT_START_ALLOC_REUSE:
+			case _STARPU_FUT_START_ALLOC_REUSE:
 				if (!options->no_bus)
 				handle_memnode_event(&ev, options, "Ar");
 				break;
 
-			case STARPU_FUT_START_MEMRECLAIM:
+			case _STARPU_FUT_START_MEMRECLAIM:
 				handle_memnode_event(&ev, options, "R");
 				break;
 
-			case STARPU_FUT_END_ALLOC:
-			case STARPU_FUT_END_ALLOC_REUSE:
-			case STARPU_FUT_END_MEMRECLAIM:
+			case _STARPU_FUT_END_ALLOC:
+			case _STARPU_FUT_END_ALLOC_REUSE:
+			case _STARPU_FUT_END_MEMRECLAIM:
 				if (!options->no_bus)
 				handle_memnode_event(&ev, options, "No");
 				break;
 
-			case STARPU_FUT_USER_EVENT:
+			case _STARPU_FUT_USER_EVENT:
 				handle_user_event(&ev, options);
 				break;
 
@@ -1011,11 +1053,11 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 				handle_mpi_irecv_end(&ev, options);
 				break;
 
-			case STARPU_FUT_SET_PROFILING:
+			case _STARPU_FUT_SET_PROFILING:
 				handle_set_profiling(&ev, options);
 				break;
 
-			case STARPU_FUT_TASK_WAIT_FOR_ALL:
+			case _STARPU_FUT_TASK_WAIT_FOR_ALL:
 				handle_task_wait_for_all();
 				break;
 
@@ -1110,9 +1152,10 @@ void starpu_fxt_paje_file_init(struct starpu_fxt_options *options)
 			exit(1);
 		}
 
-		starpu_fxt_write_paje_header(out_paje_file);
+		_starpu_fxt_write_paje_header(out_paje_file);
 	}
-	else {
+	else
+	{
 		out_paje_file = NULL;
 	}
 }
@@ -1129,18 +1172,20 @@ static uint64_t starpu_fxt_find_start_time(char *filename_in)
 	/* Open the trace file */
 	int fd_in;
 	fd_in = open(filename_in, O_RDONLY);
-	if (fd_in < 0) {
+	if (fd_in < 0)
+	{
 	        perror("open failed :");
 	        exit(-1);
 	}
 
 	static fxt_t fut;
 	fut = fxt_fdopen(fd_in);
-	if (!fut) {
+	if (!fut)
+	{
 	        perror("fxt_fdopen :");
 	        exit(-1);
 	}
-	
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
@@ -1160,7 +1205,7 @@ static uint64_t starpu_fxt_find_start_time(char *filename_in)
 
 void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 {
-	starpu_fxt_dag_init(options->dag_path);
+	_starpu_fxt_dag_init(options->dag_path);
 	starpu_fxt_distrib_file_init(options);
 	starpu_fxt_activity_file_init(options);
 
@@ -1176,7 +1221,8 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
 		starpu_fxt_parse_new_file(options->filenames[0], options);
 	}
-	else {
+	else
+	{
 		unsigned inputfile;
 
 		uint64_t offsets[64];
@@ -1191,7 +1237,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 		 * More generally:
 		 *	- psi_k(x) = x - offset_k
 		 */
-		
+
 		int unique_keys[64];
 		int rank_k[64];
 		uint64_t start_k[64];
@@ -1201,19 +1247,19 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
 		unsigned found_one_sync_point = 0;
 		int key = 0;
-		unsigned display_mpi = 0; 
+		unsigned display_mpi = 0;
 
 		/* Compute all start_k */
 		for (inputfile = 0; inputfile < options->ninputfiles; inputfile++)
 		{
 			uint64_t file_start = starpu_fxt_find_start_time(options->filenames[inputfile]);
-			start_k[inputfile] = file_start; 
+			start_k[inputfile] = file_start;
 		}
 
 		/* Compute all sync_k if they exist */
 		for (inputfile = 0; inputfile < options->ninputfiles; inputfile++)
 		{
-			int ret = starpu_fxt_mpi_find_sync_point(options->filenames[inputfile],
+			int ret = _starpu_fxt_mpi_find_sync_point(options->filenames[inputfile],
 						&sync_k[inputfile],
 						&unique_keys[inputfile],
 						&rank_k[inputfile]);
@@ -1222,14 +1268,16 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 				/* There was no sync point, we assume there is no offset */
 				sync_k_exists[inputfile] = 0;
 			}
-			else {
+			else
+			{
 				if (!found_one_sync_point)
 				{
 					key = unique_keys[inputfile];
 					display_mpi = 1;
 					found_one_sync_point = 1;
 				}
-				else {
+				else
+				{
 					if (key != unique_keys[inputfile])
 					{
 						fprintf(stderr, "Warning: traces are coming from different run so we will not try to display MPI communications.\n");
@@ -1276,7 +1324,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 
 		/* display the MPI transfers if possible */
 		if (display_mpi)
-			starpu_fxt_display_mpi_transfers(options, rank_k, out_paje_file);
+			_starpu_fxt_display_mpi_transfers(options, rank_k, out_paje_file);
 	}
 
 	_starpu_fxt_display_bandwidth(options);
@@ -1286,7 +1334,7 @@ void starpu_fxt_generate_trace(struct starpu_fxt_options *options)
 	starpu_fxt_activity_file_close();
 	starpu_fxt_distrib_file_close(options);
 
-	starpu_fxt_dag_terminate();
+	_starpu_fxt_dag_terminate();
 
 	options->nworkers = nworkers;
 }

+ 16 - 17
src/debug/traces/starpu_fxt.h

@@ -14,8 +14,8 @@
  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  */
 
-#ifndef __STARPU_FXT_H__
-#define __STARPU_FXT_H__
+#ifndef __STARPU__FXT_H__
+#define __STARPU__FXT_H__
 
 #include <starpu.h>
 #include <starpu_config.h>
@@ -36,28 +36,27 @@
 #include <common/list.h>
 #include "../mpi/starpu_mpi_fxt.h"
 #include <starpu.h>
+#include "../../../include/starpu_fxt.h"
 
-#define FACTOR  100
-
-void starpu_fxt_dag_init(char *dag_filename);
-void starpu_fxt_dag_terminate(void);
-void starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father);
-void starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color);
-void starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ);
-void starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color);
-void starpu_fxt_dag_add_sync_point(void);
+void _starpu_fxt_dag_init(char *dag_filename);
+void _starpu_fxt_dag_terminate(void);
+void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father);
+void _starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color);
+void _starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ);
+void _starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color);
+void _starpu_fxt_dag_add_sync_point(void);
 
 /*
  *	MPI
  */
 
-int starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key, int *rank);
-void starpu_fxt_mpi_add_send_transfer(int src, int dst, int mpi_tag, size_t size, float date);
-void starpu_fxt_mpi_add_recv_transfer(int src, int dst, int mpi_tag, float date);
-void starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file);
+int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key, int *rank);
+void _starpu_fxt_mpi_add_send_transfer(int src, int dst, int mpi_tag, size_t size, float date);
+void _starpu_fxt_mpi_add_recv_transfer(int src, int dst, int mpi_tag, float date);
+void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file);
 
-void starpu_fxt_write_paje_header(FILE *file);
+void _starpu_fxt_write_paje_header(FILE *file);
 
 #endif // STARPU_USE_FXT
 
-#endif // __STARPU_FXT_H__
+#endif // __STARPU__FXT_H__

+ 16 - 15
src/debug/traces/starpu_fxt_dag.c

@@ -25,7 +25,7 @@
 static FILE *out_file;
 static unsigned cluster_cnt;
 
-void starpu_fxt_dag_init(char *out_path)
+void _starpu_fxt_dag_init(char *out_path)
 {
 	if (!out_path)
 	{
@@ -35,7 +35,8 @@ void starpu_fxt_dag_init(char *out_path)
 
 	/* create a new file */
 	out_file = fopen(out_path, "w+");
-	if (!out_file) {
+	if (!out_file)
+	{
 		fprintf(stderr,"error while opening %s\n", out_path);
 		perror("fopen");
 		exit(1);
@@ -51,7 +52,7 @@ void starpu_fxt_dag_init(char *out_path)
 	fprintf(out_file, "\tcolor=black;\n");
 }
 
-void starpu_fxt_dag_terminate(void)
+void _starpu_fxt_dag_terminate(void)
 {
 	if (!out_file)
 		return;
@@ -63,33 +64,33 @@ void starpu_fxt_dag_terminate(void)
 	fclose(out_file);
 }
 
-void starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father)
+void _starpu_fxt_dag_add_tag_deps(uint64_t child, uint64_t father)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n", 
-		(unsigned long long)father, (unsigned long long)child);
+		fprintf(out_file, "\t \"tag_%llx\"->\"tag_%llx\"\n",
+			(unsigned long long)father, (unsigned long long)child);
 }
 
-void starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ)
+void _starpu_fxt_dag_add_task_deps(unsigned long dep_prev, unsigned long dep_succ)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
-} 
+		fprintf(out_file, "\t \"task_%lx\"->\"task_%lx\"\n", dep_prev, dep_succ);
+}
 
-void starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color)
+void _starpu_fxt_dag_set_tag_done(uint64_t tag, const char *color)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n", 
-		(unsigned long long)tag, color);
+		fprintf(out_file, "\t \"tag_%llx\" [ style=filled, label=\"\", color=\"%s\"]\n",
+			(unsigned long long)tag, color);
 }
 
-void starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color)
+void _starpu_fxt_dag_set_task_done(unsigned long job_id, const char *label, const char *color)
 {
 	if (out_file)
-	fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
+		fprintf(out_file, "\t \"task_%lx\" [ style=filled, label=\"%s\", color=\"%s\"]\n", job_id, label, color);
 }
 
-void starpu_fxt_dag_add_sync_point(void)
+void _starpu_fxt_dag_add_sync_point(void)
 {
 	if (!out_file)
 		return;

+ 21 - 13
src/debug/traces/starpu_fxt_mpi.c

@@ -21,7 +21,8 @@
 
 #include "starpu_fxt.h"
 
-struct mpi_transfer {
+struct mpi_transfer
+{
 	unsigned matched;
 	int other_rank; /* src for a recv, dest for a send */
 	int mpi_tag;
@@ -31,25 +32,27 @@ struct mpi_transfer {
 
 /* Returns 0 if a barrier is found, -1 otherwise. In case of success, offset is
  * filled with the timestamp of the barrier */
-int starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key, int *rank)
+int _starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key, int *rank)
 {
 	STARPU_ASSERT(offset);
 
 	/* Open the trace file */
 	int fd_in;
 	fd_in = open(filename_in, O_RDONLY);
-	if (fd_in < 0) {
+	if (fd_in < 0)
+	{
 	        perror("open failed :");
 	        exit(-1);
 	}
 
 	static fxt_t fut;
 	fut = fxt_fdopen(fd_in);
-	if (!fut) {
+	if (!fut)
+	{
 	        perror("fxt_fdopen :");
 	        exit(-1);
 	}
-	
+
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 
@@ -57,9 +60,11 @@ int starpu_fxt_mpi_find_sync_point(char *filename_in, uint64_t *offset, int *key
 
 	int func_ret = -1;
 	unsigned found = 0;
-	while(!found) {
+	while(!found)
+	{
 		int ret = fxt_next_ev(block, FXT_EV_TYPE_64, (struct fxt_ev *)&ev);
-		if (ret != FXT_EV_OK) {
+		if (ret != FXT_EV_OK)
+		{
 			fprintf(stderr, "no more block ...\n");
 			break;
 		}
@@ -106,7 +111,7 @@ unsigned mpi_recvs_used[64] = {0};
  * transfer, thus avoiding a quadratic complexity. */
 unsigned mpi_recvs_matched[64] = {0};
 
-void starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, int mpi_tag, size_t size, float date)
+void _starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED, int mpi_tag, size_t size, float date)
 {
 	unsigned slot = mpi_sends_used[src]++;
 
@@ -116,7 +121,8 @@ void starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 		{
 			mpi_sends_list_size[src] *= 2;
 		}
-		else {
+		else
+		{
 			mpi_sends_list_size[src] = 1;
 		}
 
@@ -130,7 +136,7 @@ void starpu_fxt_mpi_add_send_transfer(int src, int dst STARPU_ATTRIBUTE_UNUSED,
 	mpi_sends[src][slot].date = date;
 }
 
-void starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag, float date)
+void _starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst, int mpi_tag, float date)
 {
 	unsigned slot = mpi_recvs_used[dst]++;
 
@@ -140,7 +146,8 @@ void starpu_fxt_mpi_add_recv_transfer(int src STARPU_ATTRIBUTE_UNUSED, int dst,
 		{
 			mpi_recvs_list_size[dst] *= 2;
 		}
-		else {
+		else
+		{
 			mpi_recvs_list_size[dst] = 1;
 		}
 
@@ -174,7 +181,8 @@ struct mpi_transfer *try_to_match_send_transfer(int src STARPU_ATTRIBUTE_UNUSED,
 
 			all_previous_were_matched = 0;
 		}
-		else {
+		else
+		{
 			if (all_previous_were_matched)
 			{
 				/* All previous transfers are already matched,
@@ -224,7 +232,7 @@ static void display_all_transfers_from_trace(FILE *out_paje_file, int src)
 	}
 }
 
-void starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file)
+void _starpu_fxt_display_mpi_transfers(struct starpu_fxt_options *options, int *ranks, FILE *out_paje_file)
 {
 	unsigned inputfile;
 

+ 2 - 4
src/debug/traces/starpu_paje.c

@@ -19,7 +19,7 @@
 
 #ifdef STARPU_USE_FXT
 
-void starpu_fxt_write_paje_header(FILE *file)
+void _starpu_fxt_write_paje_header(FILE *file)
 {
 	fprintf(file, "%%EventDef	PajeDefineContainerType	1\n");
 	fprintf(file, "%%	Alias	string\n");
@@ -124,7 +124,6 @@ void starpu_fxt_write_paje_header(FILE *file)
 	fprintf(file, "%%	Key	string\n");
 	fprintf(file, "%%EndEventDef\n");
 
-
 	fprintf(file, "                                        \n \
 	1       MPIP      0       \"MPI Program\"                      	\n \
 	1       P      MPIP       \"Program\"                      	\n \
@@ -238,6 +237,7 @@ void starpu_fxt_write_paje_header(FILE *file)
 	6       Ar       MS      AllocatingReuse       \".1 .1 .8\"		\n \
 	6       R       MS      Reclaiming         \".0 .1 .4\"		\n \
 	6       Co       MS     DriverCopy         \".3 .5 .1\"		\n \
+	6       CoA      MS     DriverCopyAsync         \".1 .3 .1\"		\n \
 	6       No       MS     Nothing         \".0 .0 .0\"		\n \
 	5       MPIL     MPIP	P	P      MPIL\n \
 	5       L       P	Mn	Mn      L\n");
@@ -246,5 +246,3 @@ void starpu_fxt_write_paje_header(FILE *file)
 }
 
 #endif
-
-