Browse Source

Add STARPU_TAG_ONLY, to specify a tag for traces without making StarPU manage the tag.

Use it in several examples.
Samuel Thibault 11 years ago
parent
commit
4705435be8

+ 2 - 0
ChangeLog

@@ -68,6 +68,8 @@ Small features:
   * The option --enable-verbose can be called with
     --enable-verbose=extra to increase the verbosity
   * Add codelet size, footprint and tag id in the paje trace.
+  * Add STARPU_TAG_ONLY, to specify a tag for traces without making StarPU
+    manage the tag.
 
 Changes:
   * Data interfaces (variable, vector, matrix and block) now define

+ 5 - 0
doc/doxygen/chapters/13offline_performance_tools.doxy

@@ -132,6 +132,11 @@ $ starpu_fxt_tool -i filename1 -i filename2
 By default, all tasks are displayed using a green color. To display tasks with
 varying colors, pass option <c>-c</c> to <c>starpu_fxt_tool</c>.
 
+To identify tasks precisely, the application can set the ::tag_id field of the
+tasks (or use STARPU_TAG_ONY when using starpu_task_insert), and with a recent
+enough version of vite (>= r1430) and the <c>--enable-paje-codelet-details</c>
+configure option, the value of the tag will show up in the trace.
+
 Traces can also be inspected by hand by using the tool <c>fxt_print</c>, for instance:
 
 \verbatim

+ 6 - 1
doc/doxygen/chapters/api/insert_task.doxy

@@ -28,7 +28,7 @@ specifying the worker on which to execute the task (as specified by
 starpu_task::execute_on_a_specific_worker)
 <li> the specific values ::STARPU_VALUE, ::STARPU_CALLBACK,
 ::STARPU_CALLBACK_ARG, ::STARPU_CALLBACK_WITH_ARG, ::STARPU_PRIORITY,
-::STARPU_TAG, ::STARPU_FLOPS, ::STARPU_SCHED_CTX followed by the
+::STARPU_TAG, ::STARPU_TAG_ONLY, ::STARPU_FLOPS, ::STARPU_SCHED_CTX followed by the
 appropriated objects as defined elsewhere.
 </ul>
 
@@ -84,6 +84,11 @@ the task (as specified by starpu_task::execute_on_a_specific_worker)
 \ingroup API_Insert_Task
 this macro is used when calling starpu_task_insert(), and must be followed by a tag.
 
+\def STARPU_TAG_ONLY
+\ingroup API_Insert_Task
+this macro is used when calling starpu_task_insert(), and must be followed by a tag.
+It sets ::tag_id, but leaves ::use_tag as 0.
+
 \def STARPU_FLOPS
 \ingroup API_Insert_Task
 this macro is used when calling starpu_task_insert(), and must

+ 2 - 0
examples/axpy/axpy.c

@@ -183,6 +183,8 @@ int main(int argc, char **argv)
 		task->handles[0] = starpu_data_get_sub_data(_handle_x, 1, b);
 		task->handles[1] = starpu_data_get_sub_data(_handle_y, 1, b);
 
+		task->tag_id = b;
+
 		ret = starpu_task_submit(task);
 		if (ret == -ENODEV)
 		{

+ 2 - 2
examples/binary/binary.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -68,7 +68,7 @@ int compute(char *file_name, int load_as_file)
 
 	for (i = 0; i < niter; i++)
 	{
-		ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, 0);
+		ret = starpu_task_insert(&cl, STARPU_RW, float_array_handle, STARPU_TAG_ONLY, (starpu_tag_t) i, 0);
 		if (STARPU_UNLIKELY(ret == -ENODEV))
 		{
 			FPRINTF(stderr, "No worker may execute this task\n");

+ 5 - 0
examples/cg/cg_kernels.c

@@ -298,6 +298,7 @@ int dot_kernel(starpu_data_handle_t v1,
 					 use_reduction?STARPU_REDUX:STARPU_RW, s,
 					 STARPU_R, starpu_data_get_sub_data(v1, 1, b),
 					 STARPU_R, starpu_data_get_sub_data(v2, 1, b),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b,
 					 0);
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 	}
@@ -443,6 +444,7 @@ int gemv_kernel(starpu_data_handle_t v1,
 		ret = starpu_task_insert(&scal_kernel_cl,
 					 STARPU_RW, starpu_data_get_sub_data(v1, 1, b2),
 					 STARPU_VALUE, &p1, sizeof(p1),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b2,
 					 0);
 		if (ret == -ENODEV) return ret;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -459,6 +461,7 @@ int gemv_kernel(starpu_data_handle_t v1,
 						 STARPU_R,	starpu_data_get_sub_data(v2, 1, b1),
 						 STARPU_VALUE,	&one,	sizeof(one),
 						 STARPU_VALUE,	&p2,	sizeof(p2),
+						 STARPU_TAG_ONLY, (starpu_tag_t) (b2 * nblocks + b1),
 						 0);
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 		}
@@ -538,6 +541,7 @@ int scal_axpy_kernel(starpu_data_handle_t v1, TYPE p1,
 					 STARPU_R,  starpu_data_get_sub_data(v2, 1, b),
 					 STARPU_VALUE, &p1, sizeof(p1),
 					 STARPU_VALUE, &p2, sizeof(p2),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b,
 					 0);
 		if (ret == -ENODEV) return ret;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -611,6 +615,7 @@ int axpy_kernel(starpu_data_handle_t v1,
 					 STARPU_RW, starpu_data_get_sub_data(v1, 1, b),
 					 STARPU_R,  starpu_data_get_sub_data(v2, 1, b),
 					 STARPU_VALUE, &p1, sizeof(p1),
+					 STARPU_TAG_ONLY, (starpu_tag_t) b,
 					 0);
 		if (ret == -ENODEV) return ret;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

+ 3 - 0
examples/cholesky/cholesky_implicit.c

@@ -57,6 +57,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 					 STARPU_RW, sdatakk,
 					 STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL,
 					 STARPU_FLOPS, (double) FLOPS_SPOTRF(nn),
+					 STARPU_TAG_ONLY, TAG11(k),
 					 0);
 		if (ret == -ENODEV) return 77;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -70,6 +71,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 						 STARPU_R, sdatakk,
 						 STARPU_RW, sdatakj,
 						 STARPU_FLOPS, (double) FLOPS_STRSM(nn, nn),
+						 STARPU_TAG_ONLY, TAG21(k,j),
 						 0);
 			if (ret == -ENODEV) return 77;
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
@@ -87,6 +89,7 @@ static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks)
 								 STARPU_R, sdatakj,
 								 STARPU_RW | STARPU_COMMUTE, sdataij,
 								 STARPU_FLOPS, (double) FLOPS_SGEMM(nn, nn, nn),
+								 STARPU_TAG_ONLY, TAG22(k,i,j),
 								 0);
 					if (ret == -ENODEV) return 77;
 					STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");

+ 2 - 1
examples/cpp/incrementer_cpp.cpp

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011, 2013  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2013-2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
  * Copyright (C) 2012 inria
  *
@@ -75,6 +75,7 @@ int main(int argc, char **argv)
 	{
 		ret = starpu_task_insert(&cl,
 					 STARPU_RW, float_array_handle,
+					 STARPU_TAG_ONLY, (starpu_tag_t) i,
 					 0);
                 if (STARPU_UNLIKELY(ret == -ENODEV))
                 {

+ 1 - 10
examples/lu/xlu.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -19,15 +19,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-#define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
-#define TAG12(k,i)	((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-#define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) 	\
-					| ((unsigned long long)(i)<<16)	\
-					| (unsigned long long)(j))))
-
 static unsigned no_prio = 0;
 
 

+ 12 - 1
examples/lu/xlu.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010-2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -22,6 +22,17 @@
 #include <starpu.h>
 #include <common/blas.h>
 
+#define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
+#define TAG12(k,i)	((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32)	\
+					| (unsigned long long)(i))))
+#define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
+					| (unsigned long long)(j))))
+#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) 	\
+					| ((unsigned long long)(i)<<16)	\
+					| (unsigned long long)(j))))
+#define PIVOT(k,i)	((starpu_tag_t)(((5ULL<<60) | (((unsigned long long)(k))<<32)	\
+					| (unsigned long long)(i))))
+
 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)
 
 #define BLAS3_FLOP(n1,n2,n3)    \

+ 9 - 1
examples/lu/xlu_implicit.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -30,6 +30,8 @@ static int create_task_11(starpu_data_handle_t dataA, unsigned k)
 	/* which sub-data is manipulated ? */
 	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
 
+	task->tag_id = TAG11(k);
+
 	/* this is an important task */
 	if (!no_prio)
 		task->priority = STARPU_MAX_PRIO;
@@ -49,6 +51,8 @@ static int create_task_12(starpu_data_handle_t dataA, unsigned k, unsigned j)
 	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);
 
+	task->tag_id = TAG12(k,j);
+
 	if (!no_prio && (j == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -68,6 +72,8 @@ static int create_task_21(starpu_data_handle_t dataA, unsigned k, unsigned i)
 	task->handles[0] = starpu_data_get_sub_data(dataA, 2, k, k);
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, k, i);
 
+	task->tag_id = TAG21(k,i);
+
 	if (!no_prio && (i == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -88,6 +94,8 @@ static int create_task_22(starpu_data_handle_t dataA, unsigned k, unsigned i, un
 	task->handles[1] = starpu_data_get_sub_data(dataA, 2, j, k);
 	task->handles[2] = starpu_data_get_sub_data(dataA, 2, j, i);
 
+	task->tag_id = TAG22(k,i,j);
+
 	if (!no_prio &&  (i == k + 1) && (j == k +1) )
 		task->priority = STARPU_MAX_PRIO;
 

+ 15 - 1
examples/lu/xlu_implicit_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
@@ -39,6 +39,8 @@ static int create_task_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 	/* which sub-data is manipulated ? */
 	task->handles[0] = get_block(dataAp, nblocks, k, i);
 
+	task->tag_id = PIVOT(k, i);
+
 	task->cl_arg = &piv_description[k];
 
 	/* this is an important task */
@@ -65,6 +67,8 @@ static int create_task_11_pivot(starpu_data_handle_t *dataAp, unsigned nblocks,
 	/* which sub-data is manipulated ? */
 	task->handles[0] = get_block(dataAp, nblocks, k, k);
 
+	task->tag_id = TAG11(k);
+
 	/* this is an important task */
 	if (!no_prio)
 		task->priority = STARPU_MAX_PRIO;
@@ -86,6 +90,8 @@ static int create_task_12(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 	task->handles[0] = get_block(dataAp, nblocks, k, k);
 	task->handles[1] = get_block(dataAp, nblocks, j, k);
 
+	task->tag_id = TAG12(k,j);
+
 	if (!no_prio && (j == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -106,6 +112,8 @@ static int create_task_21(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 	task->handles[0] = get_block(dataAp, nblocks, k, k);
 	task->handles[1] = get_block(dataAp, nblocks, k, i);
 
+	task->tag_id = TAG21(k,i);
+
 	if (!no_prio && (i == k+1))
 		task->priority = STARPU_MAX_PRIO;
 
@@ -127,6 +135,8 @@ static int create_task_22(starpu_data_handle_t *dataAp, unsigned nblocks, unsign
 	task->handles[1] = get_block(dataAp, nblocks, j, k);
 	task->handles[2] = get_block(dataAp, nblocks, j, i);
 
+	task->tag_id = TAG22(k,i,j);
+
 	if (!no_prio &&  (i == k + 1) && (j == k +1) )
 		task->priority = STARPU_MAX_PRIO;
 
@@ -237,6 +247,8 @@ int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size,
 
 	double timing;
 	int ret = dw_codelet_facto_pivot(&dataA, piv_description, nblocks, get_block_with_striding, &timing);
+	if (ret)
+		return ret;
 
 	FPRINTF(stderr, "Computation took (in ms)\n");
 	FPRINTF(stderr, "%2.2f\n", timing/1000);
@@ -290,6 +302,8 @@ int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, uns
 
 	double timing;
 	int ret = dw_codelet_facto_pivot(dataAp, piv_description, nblocks, get_block_with_no_striding, &timing);
+	if (ret)
+		return ret;
 
 	FPRINTF(stderr, "Computation took (in ms)\n");
 	FPRINTF(stderr, "%2.2f\n", timing/1000);

+ 1 - 12
examples/lu/xlu_pivot.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2012, 2014  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,17 +18,6 @@
 #include "xlu.h"
 #include "xlu_kernels.h"
 
-#define TAG11(k)	((starpu_tag_t)( (1ULL<<60) | (unsigned long long)(k)))
-#define TAG12(k,i)	((starpu_tag_t)(((2ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-#define TAG21(k,j)	((starpu_tag_t)(((3ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(j))))
-#define TAG22(k,i,j)	((starpu_tag_t)(((4ULL<<60) | ((unsigned long long)(k)<<32) 	\
-					| ((unsigned long long)(i)<<16)	\
-					| (unsigned long long)(j))))
-#define PIVOT(k,i)	((starpu_tag_t)(((5ULL<<60) | (((unsigned long long)(k))<<32)	\
-					| (unsigned long long)(i))))
-
 static unsigned no_prio = 0;
 
 /*

+ 1 - 0
examples/mandelbrot/mandelbrot.c

@@ -535,6 +535,7 @@ int main(int argc, char **argv)
 						 STARPU_VALUE, &stepY, sizeof(stepY),
 						 STARPU_W, block_handles[iby],
 						 STARPU_VALUE, &pcnt, sizeof(int *),
+						 STARPU_TAG_ONLY, (starpu_tag_t) (niter*nblocks + iby),
 						 0);
 			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
 		}

+ 5 - 1
examples/pipeline/pipeline.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2012, 2013, 2014  Centre National de la Recherche Scientifique
- * Copyright (C) 2012  Université de Bordeaux 1
+ * Copyright (C) 2012, 2014  Université de Bordeaux 1
  *
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -205,6 +205,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_x,
 				STARPU_W, buffersX[l%K],
 				STARPU_VALUE, &x, sizeof(x),
+				STARPU_TAG_ONLY, (starpu_tag_t) (100*l),
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert x");
@@ -212,6 +213,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_x,
 				STARPU_W, buffersY[l%K],
 				STARPU_VALUE, &y, sizeof(y),
+				STARPU_TAG_ONLY, (starpu_tag_t) (100*l+1),
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert y");
@@ -219,6 +221,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_axpy,
 				STARPU_R, buffersX[l%K],
 				STARPU_RW, buffersY[l%K],
+				STARPU_TAG_ONLY, (starpu_tag_t) l,
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert axpy");
@@ -226,6 +229,7 @@ int main(void)
 		ret = starpu_task_insert(&pipeline_codelet_sum,
 				STARPU_R, buffersY[l%K],
 				STARPU_CALLBACK_WITH_ARG, (void (*)(void*))sem_post, &sems[l%C],
+				STARPU_TAG_ONLY, (starpu_tag_t) l,
 				0);
 		if (ret == -ENODEV) goto enodev;
 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert sum");

+ 2 - 1
include/starpu_task_util.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2013  Université de Bordeaux 1
+ * Copyright (C) 2010-2014  Université de Bordeaux 1
  * Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
  * Copyright (C) 2014       INRIA
  *
@@ -49,6 +49,7 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 #define STARPU_PROLOGUE_CALLBACK_POP   (15<<18)
 #define STARPU_PROLOGUE_CALLBACK_POP_ARG (16<<18)
 #define STARPU_EXECUTE_ON_WORKER (17<<18)
+#define STARPU_TAG_ONLY          (18<<18)
 
 struct starpu_task *starpu_task_build(struct starpu_codelet *cl, ...);
 int starpu_task_insert(struct starpu_codelet *cl, ...);

+ 12 - 0
mpi/src/starpu_mpi_task_insert.c

@@ -309,6 +309,10 @@ int _starpu_mpi_task_decode_v(struct starpu_codelet *codelet, int me, int nb_nod
 		{
 			(void)va_arg(varg_list_copy, double);
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			(void)va_arg(varg_list, starpu_tag_t);
+		}
 		else if (arg_type==STARPU_TAG)
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
@@ -471,6 +475,10 @@ int _starpu_mpi_task_build_v(MPI_Comm comm, struct starpu_codelet *codelet, stru
 		{
 			(void)va_arg(varg_list_copy, double);
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			(void)va_arg(varg_list, starpu_tag_t);
+		}
 		else if (arg_type==STARPU_TAG)
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");
@@ -609,6 +617,10 @@ int _starpu_mpi_task_postbuild_v(MPI_Comm comm, struct starpu_codelet *codelet,
 		{
 			(void)va_arg(varg_list_copy, double);
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			(void)va_arg(varg_list, starpu_tag_t);
+		}
 		else if (arg_type==STARPU_TAG)
 		{
 			STARPU_ASSERT_MSG(0, "STARPU_TAG is not supported in MPI mode\n");

+ 7 - 3
src/util/starpu_task_insert_utils.c

@@ -126,8 +126,7 @@ size_t _starpu_task_insert_get_arg_size(va_list varg_list)
 		{
 			(void)va_arg(varg_list, double);
 		}
-
-		else if (arg_type==STARPU_TAG)
+		else if (arg_type==STARPU_TAG || arg_type==STARPU_TAG_ONLY)
 		{
 			(void)va_arg(varg_list, starpu_tag_t);
 		}
@@ -237,7 +236,7 @@ int _starpu_codelet_pack_args(void **arg_buffer, size_t arg_buffer_size, va_list
 		{
 			(void)va_arg(varg_list, double);
 		}
-		else if (arg_type==STARPU_TAG)
+		else if (arg_type==STARPU_TAG || arg_type==STARPU_TAG_ONLY)
 		{
 			(void)va_arg(varg_list, starpu_tag_t);
 		}
@@ -416,6 +415,11 @@ void _starpu_task_insert_create(void *arg_buffer, size_t arg_buffer_size, struct
 			(*task)->tag_id = tag;
 			(*task)->use_tag = 1;
 		}
+		else if (arg_type==STARPU_TAG_ONLY)
+		{
+			starpu_tag_t tag = va_arg(varg_list, starpu_tag_t);
+			(*task)->tag_id = tag;
+		}
 		else
 		{
 			STARPU_ABORT_MSG("Unrecognized argument %d\n", arg_type);