Olivier Aumage 11 gadi atpakaļ
vecāks
revīzija
29fd2d0fe6

+ 2 - 0
ChangeLog

@@ -68,6 +68,8 @@ Small features:
   * The option --enable-verbose can be called with
     --enable-verbose=extra to increase the verbosity
   * Add codelet size, footprint and tag id in the paje trace.
+  * Add STARPU_TAG_ONLY, to specify a tag for traces without making StarPU
+    manage the tag.
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define

+ 2 - 0
configure.ac

@@ -278,6 +278,8 @@ AC_CHECK_FUNC([sched_yield], [AC_DEFINE([STARPU_HAVE_SCHED_YIELD], [1], [Define
 
 AC_CHECK_HEADERS([aio.h])
 
+AC_CHECK_FUNCS([mkstemps])
+
 # This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP
 STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP
 

+ 5 - 0
doc/doxygen/chapters/13offline_performance_tools.doxy

@@ -132,6 +132,11 @@ $ starpu_fxt_tool -i filename1 -i filename2
 By default, all tasks are displayed using a green color. To display tasks with
 varying colors, pass option <c>-c</c> to <c>starpu_fxt_tool</c>.
 
+To identify tasks precisely, the application can set the ::tag_id field of the
+tasks (or use STARPU_TAG_ONY when using starpu_task_insert), and with a recent
+enough version of vite (>= r1430) and the <c>--enable-paje-codelet-details</c>
+configure option, the value of the tag will show up in the trace.
+
 Traces can also be inspected by hand by using the tool <c>fxt_print</c>, for instance:
 
 \verbatim

+ 6 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -28,7 +28,7 @@ specifying the worker on which to execute the task (as specified by
 starpu_task::execute_on_a_specific_worker)
 <li> the specific values ::STARPU_VALUE, ::STARPU_CALLBACK,
 ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG, ::STARPU_PRIORITY,
-::STARPU_TAG, ::STARPU_FLOPS, ::STARPU_SCHED_CTX followed by the
+::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, ::STARPU_SCHED_CTX followed by the
 appropriated objects as defined elsewhere.
 </ul>
 
@@ -84,6 +84,11 @@ the task (as specified by starpu_task::execute_on_a_specific_worker)
 \ingroup API_Insert_Task
 this macro is used when calling starpu_task_insert(), and must be followed by a tag.
 
+\def STARPU_TAG_ONLY
+\ingroup API_Insert_Task
+this macro is used when calling starpu_task_insert(), and must be followed by a tag.
+It sets ::tag_id, but leaves ::use_tag as 0.
+
 \def STARPU_FLOPS
 \ingroup API_Insert_Task
 this macro is used when calling starpu_task_insert(), and must

+ 2 - 0
examples/axpy/axpy.c

@@ -183,6 +183,8 @@ int main(int argc, char **argv)
 		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
 		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);
 
+		task->tag_id = b;
+
 		ret = starpu_task_submit(task);
 		if (ret == -ENODEV)
 		{

+ 2 - 2
examples/binary/binary.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -68,7 +68,7 @@ int compute(char *file_name, int load_as_file)
 
 	for (i = 0; i < niter; i++)
 	{
-		ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, 0);
+		ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, STARPU_TAG_ONLY, (starpu_tag_t) i, 0);
 		if (STARPU_UNLIKELY(ret == -ENODEV))
 		{
 			FPRINTF(stderr, "No worker may execute this task\n");

+ 5 - 0
examples/cg/cg_kernels.c

@@ -298,6 +298,7 @@ int dot_kernel(starpu_data_handle_t v1,
 					 use_reduction?STARPU_REDUX:STARPU_RW, s,
 					 STARPU_R, starpu_data_get_sub_data(v1, 1, b),
 					 STARPU_R, starpu_data_get_sub_data(v2, 1, b),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b,
 					 0);
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	}
@@ -443,6 +444,7 @@ int gemv_kernel(starpu_data_handle_t v1,
 		ret = starpu_task_insert(&scal_kernel_cl,
 					 STARPU_RW, starpu_data_get_sub_data(v1, 1, b2),
 					 STARPU_VALUE, &p1, sizeof(p1),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b2,
 					 0);
 		if (ret == -ENODEV) return ret;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -459,6 +461,7 @@ int gemv_kernel(starpu_data_handle_t v1,
 						 STARPU_R,	starpu_data_get_sub_data(v2, 1, b1),
 						 STARPU_VALUE,	&one,	sizeof(one),
 						 STARPU_VALUE,	&p2,	sizeof(p2),
+						 STARPU_TAG_ONLY, (starpu_tag_t) (b2 * nblocks + b1),
 						 0);
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 		}
@@ -538,6 +541,7 @@ int scal_axpy_kernel(starpu_data_handle_t v1, TYPE p1,
 					 STARPU_R,  starpu_data_get_sub_data(v2, 1, b),
 					 STARPU_VALUE, &p1, sizeof(p1),
 					 STARPU_VALUE, &p2, sizeof(p2),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b,
 					 0);
 		if (ret == -ENODEV) return ret;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -611,6 +615,7 @@ int axpy_kernel(starpu_data_handle_t v1,
 					 STARPU_RW, starpu_data_get_sub_data(v1, 1, b),
 					 STARPU_R,  starpu_data_get_sub_data(v2, 1, b),
 					 STARPU_VALUE, &p1, sizeof(p1),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b,
 					 0);
 		if (ret == -ENODEV) return ret;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

+ 3 - 0
examples/cholesky/cholesky_implicit.c

@@ -57,6 +57,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 					 STARPU_RW, sdatakk,
 					 STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL,
 					 STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
+					 STARPU_TAG_ONLY, TAG11(k),
 					 0);
 		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -70,6 +71,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 						 STARPU_R, sdatakk,
 						 STARPU_RW, sdatakj,
 						 STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
+						 STARPU_TAG_ONLY, TAG21(k,j),
 						 0);
 			if (ret == -ENODEV) return 77;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -87,6 +89,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 								 STARPU_R, sdatakj,
 								 STARPU_RW | STARPU_COMMUTE, sdataij,
 								 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+								 STARPU_TAG_ONLY, TAG22(k,i,j),
 								 0);
 					if (ret == -ENODEV) return 77;
 					STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

+ 2 - 1
examples/cpp/incrementer_cpp.cpp

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2012 inria
  *
@@ -75,6 +75,7 @@ int main(int argc, char **argv)
 	{
 		ret = starpu_task_insert(&cl,
 					 STARPU_RW, float_array_handle,
+					 STARPU_TAG_ONLY, (starpu_tag_t) i,
 					 0);
                 if (STARPU_UNLIKELY(ret == -ENODEV))
                 {

+ 1 - 10
examples/lu/xlu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -19,15 +19,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-#define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
-#define TAG12(k,i)	((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-#define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) 	\
-					| ((unsigned long long)(i)<<16)	\
-					| (unsigned long long)(j))))
-
 static unsigned no_prio = 0;
 
 

+ 12 - 1
examples/lu/xlu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -22,6 +22,17 @@
 #include <starpu.h>
 #include <common/blas.h>
 
+#define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
+#define TAG12(k,i)	((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32)	\
+					| (unsigned long long)(i))))
+#define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
+					| (unsigned long long)(j))))
+#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) 	\
+					| ((unsigned long long)(i)<<16)	\
+					| (unsigned long long)(j))))
+#define PIVOT(k,i)	((starpu_tag_t)(((5ULL<<60) | (((unsigned long long)(k))<<32)	\
+					| (unsigned long long)(i))))
+
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
 #define BLAS3_FLOP(n1,n2,n3)    \

+ 9 - 1
examples/lu/xlu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -30,6 +30,8 @@ static int create_task_11(starpu_data_handle_t dataA, unsigned k)
 	/* which sub-data is manipulated ? */
 	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
 
+	task->tag_id = TAG11(k);
+
 	/* this is an important task */
 	if (!no_prio)
 		task->priority = STARPU_MAX_PRIO;
@@ -49,6 +51,8 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);
 
+	task->tag_id = TAG12(k,j);
+
 	if (!no_prio && (j == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -68,6 +72,8 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i);
 
+	task->tag_id = TAG21(k,i);
+
 	if (!no_prio && (i == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -88,6 +94,8 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);
 	task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i);
 
+	task->tag_id = TAG22(k,i,j);
+
 	if (!no_prio &&  (i == k + 1) && (j == k +1) )
 		task->priority = STARPU_MAX_PRIO;
 

+ 15 - 1
examples/lu/xlu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -39,6 +39,8 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 	/* which sub-data is manipulated ? */
 	task->handles[0] = get_block(dataAp, nblocks, k, i);
 
+	task->tag_id = PIVOT(k, i);
+
 	task->cl_arg = &piv_description[k];
 
 	/* this is an important task */
@@ -65,6 +67,8 @@ static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 	/* which sub-data is manipulated ? */
 	task->handles[0] = get_block(dataAp, nblocks, k, k);
 
+	task->tag_id = TAG11(k);
+
 	/* this is an important task */
 	if (!no_prio)
 		task->priority = STARPU_MAX_PRIO;
@@ -86,6 +90,8 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 	task->handles[0] = get_block(dataAp, nblocks, k, k);
 	task->handles[1] = get_block(dataAp, nblocks, j, k);
 
+	task->tag_id = TAG12(k,j);
+
 	if (!no_prio && (j == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -106,6 +112,8 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 	task->handles[0] = get_block(dataAp, nblocks, k, k);
 	task->handles[1] = get_block(dataAp, nblocks, k, i);
 
+	task->tag_id = TAG21(k,i);
+
 	if (!no_prio && (i == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -127,6 +135,8 @@ static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 	task->handles[1] = get_block(dataAp, nblocks, j, k);
 	task->handles[2] = get_block(dataAp, nblocks, j, i);
 
+	task->tag_id = TAG22(k,i,j);
+
 	if (!no_prio &&  (i == k + 1) && (j == k +1) )
 		task->priority = STARPU_MAX_PRIO;
 
@@ -237,6 +247,8 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
 	double timing;
 	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
+	if (ret)
+		return ret;
 
 	FPRINTF(stderr, "Computation took (in ms)\n");
 	FPRINTF(stderr, "%2.2f\n", timing/1000);
@@ -290,6 +302,8 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
 	double timing;
 	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
+	if (ret)
+		return ret;
 
 	FPRINTF(stderr, "Computation took (in ms)\n");
 	FPRINTF(stderr, "%2.2f\n", timing/1000);

+ 1 - 12
examples/lu/xlu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,17 +18,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-#define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
-#define TAG12(k,i)	((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-#define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) 	\
-					| ((unsigned long long)(i)<<16)	\
-					| (unsigned long long)(j))))
-#define PIVOT(k,i)	((starpu_tag_t)(((5ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-
 static unsigned no_prio = 0;
 
 /*

+ 1 - 0
examples/mandelbrot/mandelbrot.c

@@ -535,6 +535,7 @@ int main(int argc, char **argv)
 						 STARPU_VALUE, &stepY, sizeof(stepY),
 						 STARPU_W, block_handles[iby],
 						 STARPU_VALUE, &pcnt, sizeof(int *),
+						 STARPU_TAG_ONLY, (starpu_tag_t) (niter*nblocks + iby),
 						 0);
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 		}

+ 5 - 1
examples/pipeline/pipeline.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012, 2013, 2014  Centre National de la Recherche Scientifique
- * Copyright (C) 2012  Université de Bordeaux 1
+ * Copyright (C) 2012, 2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -205,6 +205,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_x,
 				STARPU_W, buffersX[l%K],
 				STARPU_VALUE, &x, sizeof(x),
+				STARPU_TAG_ONLY, (starpu_tag_t) (100*l),
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert x");
@@ -212,6 +213,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_x,
 				STARPU_W, buffersY[l%K],
 				STARPU_VALUE, &y, sizeof(y),
+				STARPU_TAG_ONLY, (starpu_tag_t) (100*l+1),
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert y");
@@ -219,6 +221,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_axpy,
 				STARPU_R, buffersX[l%K],
 				STARPU_RW, buffersY[l%K],
+				STARPU_TAG_ONLY, (starpu_tag_t) l,
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert axpy");
@@ -226,6 +229,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_sum,
 				STARPU_R, buffersY[l%K],
 				STARPU_CALLBACK_WITH_ARG, (void (*)(void*))sem_post, &sems[l%C],
+				STARPU_TAG_ONLY, (starpu_tag_t) l,
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert sum");

+ 2 - 2
examples/scheduler/schedulers.sh

@@ -2,7 +2,7 @@
 
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2012  Centre National de la Recherche Scientifique
+# Copyright (C) 2012, 2014  Centre National de la Recherche Scientifique
 #
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
@@ -25,7 +25,7 @@ check_success()
 
 [ -x ./cholesky/cholesky_tag ] || exit 77
 
-SCHEDULERS=`STARPU_SCHED="help" ./basic_examples/hello_world 2>&1 | awk '/->/ {print $1}'`
+SCHEDULERS=`STARPU_SCHED="help" ./basic_examples/hello_world 2>&1 | awk '/\t->/ {print $1}'`
 
 for sched in $SCHEDULERS
 do

+ 31 - 27
include/starpu_task.h

@@ -122,9 +122,13 @@ struct starpu_codelet
 
 struct starpu_task
 {
+	const char *name;
+
 	struct starpu_codelet *cl;
 
+	/* TODO: remove someday, this is costly */
 	struct starpu_data_descr buffers[STARPU_NMAXBUFS] STARPU_DEPRECATED;
+
 	starpu_data_handle_t handles[STARPU_NMAXBUFS];
 	void *interfaces[STARPU_NMAXBUFS];
 
@@ -133,61 +137,61 @@ struct starpu_task
 
 	void *cl_arg;
 	size_t cl_arg_size;
-	unsigned cl_arg_free;
 
 	void (*callback_func)(void *);
 	void *callback_arg;
 	/* must StarPU release callback_arg ? - 0 by default */
-	unsigned callback_arg_free;
 
 	void (*prologue_callback_func)(void *);
 	void *prologue_callback_arg;
-	/* must StarPU release prologue_callback_arg ? - 0 by default */
-	unsigned prologue_callback_arg_free;
 
 	void (*prologue_callback_pop_func)(void *);
 	void *prologue_callback_pop_arg;
-	/* must StarPU release prologue_callback_pop_arg ? - 0 by default */
-	unsigned prologue_callback_pop_arg_free;
 
-	unsigned use_tag;
 	starpu_tag_t tag_id;
 
-	unsigned sequential_consistency;
+	unsigned cl_arg_free:1;
+	unsigned callback_arg_free:1;
+	/* must StarPU release prologue_callback_arg ? - 0 by default */
+	unsigned prologue_callback_arg_free:1;
+	/* must StarPU release prologue_callback_pop_arg ? - 0 by default */
+	unsigned prologue_callback_pop_arg_free:1;
 
-	unsigned synchronous;
-	int priority;
+	unsigned use_tag:1;
+	unsigned sequential_consistency:1;
+	unsigned synchronous:1;
+	unsigned execute_on_a_specific_worker:1;
 
-	unsigned execute_on_a_specific_worker;
-	unsigned workerid;
+	unsigned detach:1;
+	unsigned destroy:1;
+	unsigned regenerate:1;
 
-	starpu_task_bundle_t bundle;
+	unsigned scheduled:1;
+
+	unsigned int mf_skip:1;
+
+	unsigned workerid;
 
-	int detach;
-	int destroy;
-	int regenerate;
+	int priority;
 
 	enum starpu_task_status status;
 
+	int magic;
+
+	unsigned sched_ctx;
+	int hypervisor_tag;
+
+	starpu_task_bundle_t bundle;
+
 	struct starpu_profiling_task_info *profiling_info;
 
+	double flops;
 	double predicted;
 	double predicted_transfer;
 
-	unsigned int mf_skip;
-
 	struct starpu_task *prev;
 	struct starpu_task *next;
 	void *starpu_private;
-	int magic;
-
-	const char *name;
-
-	unsigned sched_ctx;
-	int hypervisor_tag;
-	double flops;
-
-	unsigned scheduled;
 #ifdef STARPU_OPENMP
 	struct starpu_omp_task *omp_task;
 #endif

+ 2 - 1
include/starpu_task_util.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2014       INRIA
  *
@@ -49,6 +49,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_PROLOGUE_CALLBACK_POP   (15<<18)
 #define STARPU_PROLOGUE_CALLBACK_POP_ARG (16<<18)
 #define STARPU_EXECUTE_ON_WORKER (17<<18)
+#define STARPU_TAG_ONLY          (18<<18)
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 12 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -309,6 +309,10 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 		{
 			(void)va_arg(varg_list_copy, double);
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			(void)va_arg(varg_list, starpu_tag_t);
+		}
 		else if (arg_type==STARPU_TAG)
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
@@ -471,6 +475,10 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 		{
 			(void)va_arg(varg_list_copy, double);
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			(void)va_arg(varg_list, starpu_tag_t);
+		}
 		else if (arg_type==STARPU_TAG)
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
@@ -609,6 +617,10 @@ int _starpu_mpi_task_postbuild_v(MPI_Comm comm, struct starpu_codelet *codelet,
 		{
 			(void)va_arg(varg_list_copy, double);
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			(void)va_arg(varg_list, starpu_tag_t);
+		}
 		else if (arg_type==STARPU_TAG)
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");

+ 16 - 16
src/core/jobs.h

@@ -63,8 +63,8 @@ struct _starpu_data_descr {
 /* A job is the internal representation of a task. */
 LIST_TYPE(_starpu_job,
 
-	/* The implementation associated to the job */
-	unsigned nimpl;
+	/* Each job is attributed a unique id. */
+	unsigned long job_id;
 
 	/* The task associated to that job */
 	struct starpu_task *task;
@@ -95,18 +95,18 @@ LIST_TYPE(_starpu_job,
 
 	/* The value of the footprint that identifies the job may be stored in
 	 * this structure. */
-	unsigned footprint_is_computed;
 	uint32_t footprint;
+	unsigned footprint_is_computed:1;
 
 	/* Indicates whether the task associated to that job has already been
 	 * submitted to StarPU (1) or not (0) (using starpu_task_submit).
 	 * Becomes and stays 2 when the task is submitted several times.
 	 */
-	unsigned submitted;
+	unsigned submitted:2;
 
 	/* Indicates whether the task associated to this job is terminated or
 	 * not. */
-	unsigned terminated;
+	unsigned terminated:2;
 
 #ifdef STARPU_OPENMP
 	/* Job is a continuation or a regular task. */
@@ -140,25 +140,19 @@ LIST_TYPE(_starpu_job,
 
 	/* Should that task appear in the debug tools ? (eg. the DAG generated
 	 * with dot) */
-        unsigned exclude_from_dag;
+	unsigned exclude_from_dag:1;
 
 	/* Is that task internal to StarPU? */
-	unsigned internal;
-
-	/* Each job is attributed a unique id. */
-	unsigned long job_id;
+	unsigned internal:1;
 
 	/* During the reduction of a handle, StarPU may have to submit tasks to
 	 * perform the reduction itself: those task should not be stalled while
 	 * other tasks are blocked until the handle has been properly reduced,
 	 * so we need a flag to differentiate them from "normal" tasks. */
-	unsigned reduction_task;
-
-	/* Used to record codelet start time instead of using a
-	 * local variable */
-	struct timespec cl_start;
+	unsigned reduction_task:1;
 
-	struct bound_task *bound_task;
+	/* The implementation associated to the job */
+	unsigned nimpl;
 
 	/* Number of workers executing that task (>1 if the task is parallel)
 	 * */
@@ -171,6 +165,12 @@ LIST_TYPE(_starpu_job,
 	 * parallel tasks only). */
 	int active_task_alias_count;
 
+	/* Used to record codelet start time instead of using a
+	 * local variable */
+	struct timespec cl_start;
+
+	struct bound_task *bound_task;
+
 	/* Parallel workers may have to synchronize before/after the execution of a parallel task. */
 	starpu_pthread_barrier_t before_work_barrier;
 	starpu_pthread_barrier_t after_work_barrier;

+ 8 - 0
src/core/simgrid.c

@@ -204,7 +204,11 @@ void _starpu_simgrid_init()
 		char cmdline[1024];
 		FILE *in;
 		int out;
+#ifdef HAVE_MKSTEMPS
 		char template[] = "/tmp/"STARPU_MPI_AS_PREFIX"-platform-XXXXXX.xml";
+#else
+		char template[] = "/tmp/"STARPU_MPI_AS_PREFIX"-platform-XXXXXX";
+#endif
 		int ret;
 
 		STARPU_ASSERT(starpu_mpi_world_rank);
@@ -214,7 +218,11 @@ void _starpu_simgrid_init()
 		_starpu_simgrid_get_platform_path(path, sizeof(path));
 		in = fopen(path, "r");
 		STARPU_ASSERT_MSG(in, "Could not open platform file %s", path);
+#ifdef HAVE_MKSTEMPS
 		out = mkstemps(template, strlen(".xml"));
+#else
+		out = mkstemp(template);
+#endif
 
 		/* Generate modified XML platform */
 		STARPU_ASSERT_MSG(out >= 0, "Could not create temporary file like %s", template);

+ 10 - 2
src/core/tree.c

@@ -104,9 +104,17 @@ struct starpu_tree* starpu_tree_get_neighbour(struct starpu_tree *tree, struct s
 {
 	struct starpu_tree *father = node == NULL ? tree : node->father;
 	
-	int i;
-	for(i = 0; i < father->arity; i++)
+	int i, st, n;
+
+	for(st = 0; st < father->arity; st++)
+	{
+		if(father->nodes[st] == node)
+			break;
+	}
+
+	for(n = 0; n < father->arity; n++)
 	{
+		i = (st+n)%father->arity;
 		if(father->nodes[i] != node)
 		{
 			if(father->nodes[i]->arity == 0)

+ 7 - 3
src/util/starpu_task_insert_utils.c

@@ -126,8 +126,7 @@ size_t _starpu_task_insert_get_arg_size(va_list varg_list)
 		{
 			(void)va_arg(varg_list, double);
 		}
-
-		else if (arg_type==STARPU_TAG)
+		else if (arg_type==STARPU_TAG || arg_type==STARPU_TAG_ONLY)
 		{
 			(void)va_arg(varg_list, starpu_tag_t);
 		}
@@ -237,7 +236,7 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list
 		{
 			(void)va_arg(varg_list, double);
 		}
-		else if (arg_type==STARPU_TAG)
+		else if (arg_type==STARPU_TAG || arg_type==STARPU_TAG_ONLY)
 		{
 			(void)va_arg(varg_list, starpu_tag_t);
 		}
@@ -416,6 +415,11 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 			(*task)->tag_id = tag;
 			(*task)->use_tag = 1;
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			starpu_tag_t tag = va_arg(varg_list, starpu_tag_t);
+			(*task)->tag_id = tag;
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);

+ 3 - 16
tests/datawizard/increment_init.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2010, 2012-2014  Université de Bordeaux 1
- * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010, 2011, 2012, 2014  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -28,8 +28,6 @@ static starpu_data_handle_t handle;
 #ifdef STARPU_USE_CUDA
 static void neutral_cuda_kernel(void *descr[], void *arg)
 {
-	STARPU_SKIP_IF_VALGRIND;
-
 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
 
 	/* This is a dummy technique of course */
@@ -42,8 +40,6 @@ static void neutral_cuda_kernel(void *descr[], void *arg)
 #ifdef STARPU_USE_OPENCL
 static void neutral_opencl_kernel(void *descr[], void *arg)
 {
-	STARPU_SKIP_IF_VALGRIND;
-
 	unsigned h_dst = 0;
 	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
 
@@ -59,8 +55,6 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 
 static void neutral_cpu_kernel(void *descr[], void *arg)
 {
-	STARPU_SKIP_IF_VALGRIND;
-
 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	*dst = 0;
 }
@@ -86,8 +80,6 @@ static struct starpu_codelet neutral_cl =
 /* dummy OpenCL implementation */
 static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE_UNUSED)
 {
-	STARPU_SKIP_IF_VALGRIND;
-
 	cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
 	unsigned h_token;
 
@@ -105,8 +97,6 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg STARPU_ATTRIBUTE
 #ifdef STARPU_USE_CUDA
 static void increment_cuda_kernel(void *descr[], void *arg)
 {
-	STARPU_SKIP_IF_VALGRIND;
-
 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	unsigned host_token;
 
@@ -123,8 +113,6 @@ static void increment_cuda_kernel(void *descr[], void *arg)
 
 static void increment_cpu_kernel(void *descr[], void *arg)
 {
-	STARPU_SKIP_IF_VALGRIND;
-
 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
 	*tokenptr = *tokenptr + 1;
 }
@@ -144,7 +132,7 @@ static struct starpu_codelet increment_cl =
 
 int main(int argc, char **argv)
 {
-	unsigned *pvar;
+	unsigned *pvar = NULL;
 	int ret;
 
 	ret = starpu_init(NULL);
@@ -209,6 +197,5 @@ enodev:
 
 err:
 	starpu_shutdown();
-	STARPU_RETURN(EXIT_FAILURE);
-
+	return EXIT_FAILURE;
 }