4 anos atrás · 5c2530682a
--- a/include/starpu_driver.h
+++ b/include/starpu_driver.h
@@ -64,7 +64,6 @@ struct starpu_driver
 
				 #else
			
 
				 		unsigned opencl_id;
			
 
				 #endif
			
 
				-		unsigned fpga_id;
			
 
				 	} id;
			
 
				 };
			
 
				 
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -35,8 +35,6 @@ extern "C"
 
				 struct starpu_task;
			
 
				 struct starpu_data_descr;
			
 
				 
			
 
				-#define STARPU_NARCH STARPU_ANY_WORKER
			
 
				-
			
 
				 /**
			
 
				    todo
			
 
				 */
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -349,7 +349,7 @@ struct starpu_codelet
 
				 	*/
			
 
				 	starpu_cuda_func_t cuda_func STARPU_DEPRECATED;
			
 
				 
			
 
				-        /**
			
 
				+	/**
			
 
				 	   @deprecated
			
 
				 	   Optional field which has been made deprecated. One should
			
 
				 	   use instead the starpu_codelet::opencl_funcs field.
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -69,7 +69,7 @@ enum starpu_worker_archtype
 
				 	STARPU_FPGA_WORKER=4,       /**< FPGA device */
			
 
				 	STARPU_MIC_WORKER=3,        /**< Intel MIC device */
			
 
				 	STARPU_MPI_MS_WORKER=5,     /**< MPI Slave device */
			
 
				-	STARPU_MAX_WORKER=5,        /**< maximum value of STARPU_*_WORKER */
			
 
				+	STARPU_NARCH = 6,           /**< Number of arch types */
			
 
				 	STARPU_ANY_WORKER=255       /**< any worker, used in the hypervisor */
			
 
				 };
			
 
				 
			
@@ -389,6 +389,12 @@ enum starpu_node_kind starpu_node_get_kind(unsigned node);
 
				 enum starpu_worker_archtype starpu_memory_node_get_worker_archtype(enum starpu_node_kind node_kind);
			
 
				 
			
 
				 /**
			
 
				+   Return the type of memory node that arch type \p type operates on
			
 
				+  */
			
 
				+enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type);
			
 
				+
			
 
				+
			
 
				+/**
			
 
				    @name Scheduling operations
			
 
				    @{
			
 
				 */
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -37,13 +37,8 @@
 
				 #include <starpu.h>
			
 
				 
			
 
				 /* some key to identify the worker kind */
			
 
				-#define _STARPU_FUT_APPS_KEY	0x100
			
 
				-#define _STARPU_FUT_CPU_KEY	0x101
			
 
				-#define _STARPU_FUT_CUDA_KEY	0x102
			
 
				-#define _STARPU_FUT_OPENCL_KEY	0x103
			
 
				-#define _STARPU_FUT_FPGA_KEY	0x109
			
 
				-#define _STARPU_FUT_MIC_KEY	0x104
			
 
				-#define _STARPU_FUT_MPI_KEY	0x106
			
 
				+#define _STARPU_FUT_WORKER_KEY(kind) (kind - 0x100)
			
 
				+#define _STARPU_FUT_KEY_WORKER(key) (key - 0x100)
			
 
				 
			
 
				 #define _STARPU_FUT_WORKER_INIT_START	0x5100
			
 
				 #define _STARPU_FUT_WORKER_INIT_END	0x5101
			
@@ -738,10 +733,9 @@ do {									\
 
				 		FUT_DO_ALWAYS_PROBE2(FUT_NEW_LWP_CODE, cpuid, _starpu_gettid()); \
			
 
				 } while (0)
			
 
				 
			
 
				-/* workerkind = _STARPU_FUT_CPU_KEY for instance */
			
 
				 #define _STARPU_TRACE_WORKER_INIT_START(workerkind, workerid, devid, memnode, bindid, sync)	do {\
			
 
				 	if (_starpu_fxt_started) \
			
 
				-		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, workerkind, workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
			
 
				+		FUT_DO_ALWAYS_PROBE7(_STARPU_FUT_WORKER_INIT_START, _STARPU_FUT_WORKER_KEY(workerkind), workerid, devid, memnode, bindid, sync, _starpu_gettid()); \
			
 
				 } while (0)
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_INIT_END(__workerid)		do {\
			
@@ -931,7 +925,7 @@ do {										\
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_DEINIT_END(workerkind)		do {\
			
 
				 	if (_starpu_fxt_started) \
			
 
				-		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, workerkind, _starpu_gettid()); \
			
 
				+		FUT_DO_ALWAYS_PROBE2(_STARPU_FUT_WORKER_DEINIT_END, _STARPU_FUT_WORKER_KEY(workerkind), _starpu_gettid()); \
			
 
				 } while(0)
			
 
				 
			
 
				 #define _STARPU_TRACE_WORKER_SCHEDULING_START	\
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -295,7 +295,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 		if (!_starpu_data_is_multiformat_handle(handle))
			
 
				 			continue;
			
 
				 
			
 
				-		node_kind = _starpu_worker_get_node_kind(arch->devices[0].type);
			
 
				+		node_kind = starpu_worker_get_memory_node_kind(arch->devices[0].type);
			
 
				 		if (!_starpu_handle_needs_conversion_task_for_arch(handle, node_kind))
			
 
				 			continue;
			
 
				 
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -53,7 +53,7 @@ static int current_arch_comb;
 
				 static int nb_arch_combs;
			
 
				 static starpu_pthread_rwlock_t arch_combs_mutex;
			
 
				 static int historymaxerror;
			
 
				-static char ignore_devid[STARPU_ANY_WORKER];
			
 
				+static char ignore_devid[STARPU_NARCH];
			
 
				 
			
 
				 /* How many executions a codelet will have to be measured before we
			
 
				  * consider that calibration will provide a value good enough for scheduling */
			
@@ -1221,6 +1221,7 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	unsigned ncuda =  conf->topology.nhwcudagpus;
			
 
				 	unsigned nopencl = conf->topology.nhwopenclgpus;
			
 
				 	unsigned nmic = 0;
			
 
				+	enum starpu_worker_archtype archtype;
			
 
				 #if STARPU_MAXMICDEVS > 0 || STARPU_MAXMPIDEVS > 0
			
 
				 	unsigned i;
			
 
				 #endif
			
@@ -1241,11 +1242,14 @@ void _starpu_initialize_registered_performance_models(void)
 
				 	current_arch_comb = 0;
			
 
				 	historymaxerror = starpu_get_env_number_default("STARPU_HISTORY_MAX_ERROR", STARPU_HISTORYMAXERROR);
			
 
				 	_starpu_calibration_minimum = starpu_get_env_number_default("STARPU_CALIBRATE_MINIMUM", 10);
			
 
				-	ignore_devid[STARPU_CPU_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", 1);
			
 
				-	ignore_devid[STARPU_CUDA_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CUDA", 0);
			
 
				-	ignore_devid[STARPU_OPENCL_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_OPENCL", 0);
			
 
				-	ignore_devid[STARPU_MIC_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MIC", 0);
			
 
				-	ignore_devid[STARPU_MPI_MS_WORKER] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_MPI_MS", 0);
			
 
				+
			
 
				+	for (archtype = 0; archtype < STARPU_NARCH; archtype++) {
			
 
				+		char name[128];
			
 
				+		const char *arch = starpu_worker_get_type_as_env_var(archtype);
			
 
				+		int def = archtype == STARPU_CPU_WORKER ? 1 : 0;
			
 
				+		snprintf(name, sizeof(name), "STARPU_PERF_MODEL_HOMOGENEOUS_%s", arch);
			
 
				+		ignore_devid[archtype] = starpu_get_env_number_default("STARPU_PERF_MODEL_HOMOGENEOUS_CPU", def);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void _starpu_deinitialize_performance_model(struct starpu_perfmodel *model)
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -629,18 +629,15 @@ int _starpu_push_task_to_workers(struct starpu_task *task)
 
				 				&& starpu_get_prefetch_flag()
			
 
				 				&& starpu_memory_nodes_get_count() > 1)
			
 
				 			{
			
 
				-				if (task->where == STARPU_CPU && config->cpus_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
			
 
				-				else if (task->where == STARPU_CUDA && config->cuda_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
			
 
				-				else if (task->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
			
 
				-				else if (task->cl->where == STARPU_FPGA && config->fpga_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->fpga_nodeid);
			
 
				-				else if (task->where == STARPU_MIC && config->mic_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
			
 
				-				else if (task->where == STARPU_MPI_MS && config->mpi_nodeid >= 0)
			
 
				-					starpu_prefetch_task_input_on_node(task, config->mpi_nodeid);
			
 
				+				enum starpu_worker_archtype type;
			
 
				+				for (type = 0; type < STARPU_NARCH; type++)
			
 
				+				{
			
 
				+					if (task->where == (int32_t) STARPU_WORKER_TO_MASK(type)) {
			
 
				+						if (config->arch_nodeid[type] >= 0)
			
 
				+							starpu_prefetch_task_input_on_node(task, config->arch_nodeid[type]);
			
 
				+						break;
			
 
				+					}
			
 
				+				}
			
 
				 			}
			
 
				 
			
 
				 			STARPU_ASSERT(sched_ctx->sched_policy->push_task);
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -1484,38 +1484,29 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 
				 	switch (node_kind)
			
 
				 	{
			
 
				 		case STARPU_CPU_RAM:
			
 
				+		case STARPU_MIC_RAM:
			
 
				+		case STARPU_MPI_MS_RAM:
			
 
				 			switch(starpu_node_get_kind(handle->mf_node))
			
 
				 			{
			
 
				 				case STARPU_CPU_RAM:
			
 
				-					return 0;
			
 
				-				case STARPU_CUDA_RAM:      /* Fall through */
			
 
				-				case STARPU_OPENCL_RAM:
			
 
				 				case STARPU_MIC_RAM:
			
 
				                                 case STARPU_MPI_MS_RAM:
			
 
				-					return 1;
			
 
				+					return 0;
			
 
				 				default:
			
 
				-					STARPU_ABORT();
			
 
				+					return 1;
			
 
				 			}
			
 
				 			break;
			
 
				-		case STARPU_CUDA_RAM:    /* Fall through */
			
 
				-		case STARPU_OPENCL_RAM:
			
 
				-		case STARPU_MIC_RAM:
			
 
				-		case STARPU_MPI_MS_RAM:
			
 
				+		default:
			
 
				 			switch(starpu_node_get_kind(handle->mf_node))
			
 
				 			{
			
 
				 				case STARPU_CPU_RAM:
			
 
				-					return 1;
			
 
				-				case STARPU_CUDA_RAM:
			
 
				-				case STARPU_OPENCL_RAM:
			
 
				 				case STARPU_MIC_RAM:
			
 
				                                 case STARPU_MPI_MS_RAM:
			
 
				-					return 0;
			
 
				+					return 1;
			
 
				 				default:
			
 
				-					STARPU_ABORT();
			
 
				+					return 0;
			
 
				 			}
			
 
				 			break;
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				 	}
			
 
				 	/* that instruction should never be reached */
			
 
				 	return -EINVAL;
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -3129,6 +3129,7 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 
				 {
			
 
				 	int ret;
			
 
				 	unsigned i;
			
 
				+	enum starpu_worker_archtype type;
			
 
				 
			
 
				 	ret = _starpu_init_machine_config(config, no_mp_config);
			
 
				 	if (ret)
			
@@ -3142,57 +3143,16 @@ int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_conf
 
				 
			
 
				 	_starpu_mem_chunk_init_last();
			
 
				 
			
 
				-	config->cpus_nodeid = -1;
			
 
				-	config->cuda_nodeid = -1;
			
 
				-	config->opencl_nodeid = -1;
			
 
				-	config->fpga_nodeid = -1;
			
 
				-	config->mic_nodeid = -1;
			
 
				-	config->mpi_nodeid = -1;
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				+		config->arch_nodeid[type] = -1;
			
 
				+
			
 
				 	for (i = 0; i < starpu_worker_get_count(); i++)
			
 
				 	{
			
 
				-		switch (starpu_worker_get_type(i))
			
 
				-		{
			
 
				-			case STARPU_CPU_WORKER:
			
 
				-				if (config->cpus_nodeid == -1)
			
 
				-					config->cpus_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->cpus_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->cpus_nodeid = -2;
			
 
				-				break;
			
 
				-
			
 
				-			case STARPU_CUDA_WORKER:
			
 
				-				if (config->cuda_nodeid == -1)
			
 
				-					config->cuda_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->cuda_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->cuda_nodeid = -2;
			
 
				-				break;
			
 
				-
			
 
				-			case STARPU_OPENCL_WORKER:
			
 
				-				if (config->opencl_nodeid == -1)
			
 
				-					config->opencl_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->opencl_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->opencl_nodeid = -2;
			
 
				-				break;
			
 
				-                        case STARPU_FPGA_WORKER:
			
 
				-				if (config->fpga_nodeid == -1)
			
 
				-					config->fpga_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->fpga_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->fpga_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_MIC_WORKER:
			
 
				-				if (config->mic_nodeid == -1)
			
 
				-					config->mic_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->mic_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->mic_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_MPI_MS_WORKER:
			
 
				-				if (config->mpi_nodeid == -1)
			
 
				-					config->mpi_nodeid = starpu_worker_get_memory_node(i);
			
 
				-				else if (config->mpi_nodeid != (int) starpu_worker_get_memory_node(i))
			
 
				-					config->mpi_nodeid = -2;
			
 
				-				break;
			
 
				-			case STARPU_ANY_WORKER:
			
 
				-				STARPU_ASSERT(0);
			
 
				-		}
			
 
				+		type = starpu_worker_get_type(i);
			
 
				+		if (config->arch_nodeid[type] == -1)
			
 
				+			config->arch_nodeid[type] = starpu_worker_get_memory_node(i);
			
 
				+		else if (config->arch_nodeid[type] != (int) starpu_worker_get_memory_node(i))
			
 
				+			config->arch_nodeid[type] = -2;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -207,7 +207,7 @@ void _starpu__workers_c__register_kobs(void)
 
				 	/* TODO */
			
 
				 }
			
 
				 
			
 
				-struct starpu_driver_info starpu_driver_info[STARPU_MAX_WORKER+1];
			
 
				+struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
			
 
				 
			
 
				 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info)
			
 
				 {
			
@@ -777,23 +777,23 @@ static void _starpu_worker_deinit(struct _starpu_worker *workerarg)
 
				 }
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				-void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync)
			
 
				+void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync)
			
 
				 {
			
 
				 	unsigned devid = worker->devid;
			
 
				 	unsigned memnode = worker->memory_node;
			
 
				-	_STARPU_TRACE_WORKER_INIT_START(fut_key, worker->workerid, devid, memnode, worker->bindid, sync);
			
 
				+	_STARPU_TRACE_WORKER_INIT_START(archtype, worker->workerid, devid, memnode, worker->bindid, sync);
			
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync STARPU_ATTRIBUTE_UNUSED)
			
 
				+void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
 
				-	(void) fut_key;
			
 
				+	(void) archtype;
			
 
				 	int devid = worker->devid;
			
 
				 	(void) devid;
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	_STARPU_TRACE_REGISTER_THREAD(worker->bindid);
			
 
				-	_starpu_worker_start(worker, fut_key, sync);
			
 
				+	_starpu_worker_start(worker, archtype, sync);
			
 
				 #endif
			
 
				 	_starpu_set_local_worker_key(worker);
			
 
				 
			
@@ -933,7 +933,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 
			
 
				 #if defined(STARPU_USE_FPGA)
			
 
				 			case STARPU_FPGA_WORKER:
			
 
				-				driver.id.fpga_id = workerarg->devid;
			
 
				 				if (!_starpu_may_launch_driver(&pconfig->conf, &driver))
			
 
				 				{
			
 
				 					workerarg->run_by_starpu = 0;
			
@@ -2864,7 +2863,7 @@ void starpu_worker_set_waking_up_callback(void (*callback)(unsigned workerid))
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-enum starpu_node_kind _starpu_worker_get_node_kind(enum starpu_worker_archtype type)
			
 
				+enum starpu_node_kind starpu_worker_get_memory_node_kind(enum starpu_worker_archtype type)
			
 
				 {
			
 
				 	enum starpu_node_kind kind = starpu_driver_info[type].memory_kind;
			
 
				 	STARPU_ASSERT_MSG(kind != (enum starpu_node_kind) -1, "no memory for archtype %d", type);
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -403,18 +403,8 @@ struct _starpu_machine_config
 
				 	/** Which MPI do we use? */
			
 
				 	int current_mpi_deviceid;
			
 
				 
			
 
				-	/** Memory node for cpus, if only one */
			
 
				-	int cpus_nodeid;
			
 
				-	/** Memory node for CUDA, if only one */
			
 
				-	int cuda_nodeid;
			
 
				-	/** Memory node for OpenCL, if only one */
			
 
				-	int opencl_nodeid;
			
 
				-        /* Memory node for FPGA, if only one */
			
 
				-	int fpga_nodeid;
			
 
				-	/** Memory node for MIC, if only one */
			
 
				-	int mic_nodeid;
			
 
				-	/** Memory node for MPI, if only one */
			
 
				-	int mpi_nodeid;
			
 
				+	/** Memory node for different worker types, if only one */
			
 
				+	int arch_nodeid [STARPU_NARCH];
			
 
				 
			
 
				 	/** Separate out previous variables from per-worker data. */
			
 
				 	char padding1[STARPU_CACHELINE_SIZE];
			
@@ -479,7 +469,7 @@ struct starpu_driver_info {
 
				 };
			
 
				 
			
 
				 /** Device driver information, indexed by enum starpu_worker_archtype */
			
 
				-extern struct starpu_driver_info starpu_driver_info[STARPU_MAX_WORKER+1];
			
 
				+extern struct starpu_driver_info starpu_driver_info[STARPU_NARCH];
			
 
				 
			
 
				 void starpu_driver_info_register(enum starpu_worker_archtype archtype, const struct starpu_driver_info *info);
			
 
				 
			
@@ -551,9 +541,9 @@ unsigned _starpu_worker_can_block(unsigned memnode, struct _starpu_worker *worke
 
				 void _starpu_block_worker(int workerid, starpu_pthread_cond_t *cond, starpu_pthread_mutex_t *mutex);
			
 
				 
			
 
				 /** This function initializes the current driver for the given worker */
			
 
				-void _starpu_driver_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync);
			
 
				+void _starpu_driver_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync);
			
 
				 /** This function initializes the current thread for the given worker */
			
 
				-void _starpu_worker_start(struct _starpu_worker *worker, unsigned fut_key, unsigned sync);
			
 
				+void _starpu_worker_start(struct _starpu_worker *worker, enum starpu_worker_archtype archtype, unsigned sync);
			
 
				 
			
 
				 static inline unsigned _starpu_worker_get_count(void)
			
 
				 {
			
@@ -702,8 +692,6 @@ static inline unsigned __starpu_worker_get_id_check(const char *f, int l)
 
				 }
			
 
				 #define _starpu_worker_get_id_check(f,l) __starpu_worker_get_id_check(f,l)
			
 
				 
			
 
				-enum starpu_node_kind _starpu_worker_get_node_kind(enum starpu_worker_archtype type);
			
 
				-
			
 
				 void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *sched_ctx);
			
 
				 
			
 
				 struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid);
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -139,27 +139,24 @@ int _starpu_select_src_node(starpu_data_handle_t handle, unsigned destination)
 
				 			 * 	Unless peer transfer is supported (and it would then have been selected above).
			
 
				 			 * 	Other should be ok */
			
 
				 
			
 
				-			if (starpu_node_get_kind(i) == STARPU_CUDA_RAM ||
			
 
				-			    starpu_node_get_kind(i) == STARPU_OPENCL_RAM ||
			
 
				-			    starpu_node_get_kind(i) == STARPU_FPGA_RAM ||
			
 
				-			    starpu_node_get_kind(i) == STARPU_MIC_RAM)
			
 
				-				i_gpu = i;
			
 
				-
			
 
				 			if (starpu_node_get_kind(i) == STARPU_CPU_RAM ||
			
 
				 			    starpu_node_get_kind(i) == STARPU_MPI_MS_RAM)
			
 
				 				i_ram = i;
			
 
				-			if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
			
 
				+			else if (starpu_node_get_kind(i) == STARPU_DISK_RAM)
			
 
				 				i_disk = i;
			
 
				+			else
			
 
				+				i_gpu = i;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	/* we have to use cpu_ram in first */
			
 
				 	if (i_ram != -1)
			
 
				 		src_node = i_ram;
			
 
				-	/* no luck we have to use the disk memory */
			
 
				 	else if (i_gpu != -1)
			
 
				+	/* otherwise a gpu */
			
 
				 		src_node = i_gpu;
			
 
				 	else
			
 
				+	/* no luck we have to use the disk memory */
			
 
				 		src_node = i_disk;
			
 
				 
			
 
				 	STARPU_ASSERT(src_node != -1);
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -39,30 +39,18 @@
 
				 #include <starpu_hash.h>
			
 
				 
			
 
				 #define CPUS_WORKER_COLORS_NB	8
			
 
				-#define CUDA_WORKER_COLORS_NB	9
			
 
				-#define OPENCL_WORKER_COLORS_NB 9
			
 
				-#define MIC_WORKER_COLORS_NB	9
			
 
				-#define MPI_MS_WORKER_COLORS_NB	9
			
 
				-#define OTHER_WORKER_COLORS_NB	4
			
 
				+#define ACCEL_WORKER_COLORS_NB	9
			
 
				 
			
 
				 /* How many times longer an idle period has to be before the smoothing
			
 
				  * heuristics avoids averaging codelet gflops */
			
 
				 #define IDLE_FACTOR 2
			
 
				 
			
 
				 static char *cpus_worker_colors[CPUS_WORKER_COLORS_NB] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
			
 
				-static char *cuda_worker_colors[CUDA_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
			
 
				-static char *opencl_worker_colors[OPENCL_WORKER_COLORS_NB] = {"/blues9/9", "/blues9/6", "/blues9/3", "/blues9/1", "/blues9/8", "/blues9/7", "/blues9/4", "/blues9/2",  "/blues9/1"};
			
 
				-static char *mic_worker_colors[MIC_WORKER_COLORS_NB] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
			
 
				-static char *mpi_ms_worker_colors[MPI_MS_WORKER_COLORS_NB] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
			
 
				-static char *other_worker_colors[OTHER_WORKER_COLORS_NB] = {"/greys9/9", "/greys9/8", "/greys9/7", "/greys9/6"};
			
 
				+static char *accel_worker_colors[ACCEL_WORKER_COLORS_NB] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
			
 
				 static char *worker_colors[STARPU_NMAXWORKERS];
			
 
				 
			
 
				-static unsigned opencl_index = 0;
			
 
				-static unsigned cuda_index = 0;
			
 
				 static unsigned cpus_index = 0;
			
 
				-static unsigned mic_index = 0;
			
 
				-static unsigned mpi_ms_index = 0;
			
 
				-static unsigned other_index = 0;
			
 
				+static unsigned accel_index = 0;
			
 
				 static uint64_t* number_events = NULL;
			
 
				 
			
 
				 static unsigned long fut_keymask;
			
@@ -395,14 +383,6 @@ out:
 
				 	free(data);
			
 
				 }
			
 
				 
			
 
				-static void set_next_other_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = other_worker_colors[other_index++];
			
 
				-	if (other_index == OTHER_WORKER_COLORS_NB) other_index = 0;
			
 
				-}
			
 
				-
			
 
				 static void set_next_cpu_worker_color(int workerid)
			
 
				 {
			
 
				 	if (workerid >= STARPU_NMAXWORKERS)
			
@@ -411,36 +391,12 @@ static void set_next_cpu_worker_color(int workerid)
 
				 	if (cpus_index == CPUS_WORKER_COLORS_NB) cpus_index = 0;
			
 
				 }
			
 
				 
			
 
				-static void set_next_cuda_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = cuda_worker_colors[cuda_index++];
			
 
				-	if (cuda_index == CUDA_WORKER_COLORS_NB) cuda_index = 0;
			
 
				-}
			
 
				-
			
 
				-static void set_next_opencl_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = opencl_worker_colors[opencl_index++];
			
 
				-	if (opencl_index == OPENCL_WORKER_COLORS_NB) opencl_index = 0;
			
 
				-}
			
 
				-
			
 
				-static void set_next_mic_worker_color(int workerid)
			
 
				-{
			
 
				-	if (workerid >= STARPU_NMAXWORKERS)
			
 
				-		return;
			
 
				-	worker_colors[workerid] = mic_worker_colors[mic_index++];
			
 
				-	if (mic_index == MIC_WORKER_COLORS_NB) mic_index = 0;
			
 
				-}
			
 
				-
			
 
				-static void set_next_mpi_ms_worker_color(int workerid)
			
 
				+static void set_next_accel_worker_color(int workerid)
			
 
				 {
			
 
				 	if (workerid >= STARPU_NMAXWORKERS)
			
 
				 		return;
			
 
				-	worker_colors[workerid] = mpi_ms_worker_colors[mpi_ms_index++];
			
 
				-	if (mpi_ms_index == MPI_MS_WORKER_COLORS_NB) mpi_ms_index = 0;
			
 
				+	worker_colors[workerid] = accel_worker_colors[accel_index++];
			
 
				+	if (accel_index == ACCEL_WORKER_COLORS_NB) accel_index = 0;
			
 
				 }
			
 
				 
			
 
				 static const char *get_worker_color(int workerid)
			
@@ -1231,56 +1187,24 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
				 
			
 
				 	new_thread = register_worker_id(prefixTOnodeid(prefix), threadid, workerid, set);
			
 
				 
			
 
				-	char *kindstr = "";
			
 
				+	const char *kindstr;
			
 
				 	struct starpu_perfmodel_arch arch;
			
 
				 	arch.ndevices = 1;
			
 
				 	_STARPU_MALLOC(arch.devices, sizeof(struct starpu_perfmodel_device));
			
 
				 
			
 
				-	switch (ev->param[0])
			
 
				-	{
			
 
				-		case _STARPU_FUT_APPS_KEY:
			
 
				-			set_next_other_worker_color(workerid);
			
 
				-			kindstr = "APPS";
			
 
				-			break;
			
 
				-		case _STARPU_FUT_CPU_KEY:
			
 
				-			set_next_cpu_worker_color(workerid);
			
 
				-			kindstr = "CPU";
			
 
				-			arch.devices[0].type = STARPU_CPU_WORKER;
			
 
				-			arch.devices[0].devid = 0;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_CUDA_KEY:
			
 
				-			set_next_cuda_worker_color(workerid);
			
 
				-			kindstr = "CUDA";
			
 
				-			arch.devices[0].type = STARPU_CUDA_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_OPENCL_KEY:
			
 
				-			set_next_opencl_worker_color(workerid);
			
 
				-			kindstr = "OPENCL";
			
 
				-			arch.devices[0].type = STARPU_OPENCL_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_MIC_KEY:
			
 
				-			set_next_mic_worker_color(workerid);
			
 
				-			kindstr = "mic";
			
 
				-			arch.devices[0].type = STARPU_MIC_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				-		case _STARPU_FUT_MPI_KEY:
			
 
				-			set_next_mpi_ms_worker_color(workerid);
			
 
				-			kindstr = "mpi_ms";
			
 
				-			arch.devices[0].type = STARPU_MPI_MS_WORKER;
			
 
				-			arch.devices[0].devid = devid;
			
 
				-			arch.devices[0].ncores = 1;
			
 
				-			break;
			
 
				+	enum starpu_worker_archtype archtype = _STARPU_FUT_KEY_WORKER(ev->param[0]);
			
 
				+	STARPU_ASSERT(archtype >= 0 && archtype < STARPU_NARCH);
			
 
				+
			
 
				+	kindstr = starpu_worker_get_type_as_string(archtype);
			
 
				+	arch.devices[0].type = archtype;
			
 
				+	arch.devices[0].devid = 0;
			
 
				+	arch.devices[0].ncores = 1;
			
 
				+
			
 
				+	if (archtype == STARPU_CPU_WORKER)
			
 
				+		set_next_cpu_worker_color(workerid);
			
 
				+	else
			
 
				+		set_next_accel_worker_color(workerid);
			
 
				 
			
 
				-		default:
			
 
				-			STARPU_ABORT();
			
 
				-	}
			
 
				 	double now = get_event_time_stamp(ev, options);
			
 
				 
			
 
				 	if (out_paje_file)
			
--- a/src/drivers/cpu/driver_cpu.c
+++ b/src/drivers/cpu/driver_cpu.c
@@ -209,7 +209,7 @@ int _starpu_cpu_driver_init(struct _starpu_worker *cpu_worker)
 
				 {
			
 
				 	int devid = cpu_worker->devid;
			
 
				 
			
 
				-	_starpu_driver_start(cpu_worker, _STARPU_FUT_CPU_KEY, 1);
			
 
				+	_starpu_driver_start(cpu_worker, STARPU_CPU_WORKER, 1);
			
 
				 	snprintf(cpu_worker->name, sizeof(cpu_worker->name), "CPU %d", devid);
			
 
				 	snprintf(cpu_worker->short_name, sizeof(cpu_worker->short_name), "CPU %d", devid);
			
 
				 	starpu_pthread_setname(cpu_worker->short_name);
			
@@ -436,7 +436,7 @@ int _starpu_cpu_driver_deinit(struct _starpu_worker *cpu_worker)
 
				 	_starpu_free_all_automatically_allocated_buffers(memnode);
			
 
				 
			
 
				 	cpu_worker->worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_CPU_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -684,12 +684,12 @@ int _starpu_cuda_driver_init(struct _starpu_worker_set *worker_set)
 
				 	int lastdevid = -1;
			
 
				 	unsigned i;
			
 
				 
			
 
				-	_starpu_driver_start(worker0, _STARPU_FUT_CUDA_KEY, 0);
			
 
				+	_starpu_driver_start(worker0, STARPU_CUDA_WORKER, 0);
			
 
				 	_starpu_set_local_worker_set_key(worker_set);
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	for (i = 1; i < worker_set->nworkers; i++)
			
 
				-		_starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_CUDA_KEY, 0);
			
 
				+		_starpu_worker_start(&worker_set->workers[i], STARPU_CUDA_WORKER, 0);
			
 
				 #endif
			
 
				 
			
 
				 	for (i = 0; i < worker_set->nworkers; i++)
			
@@ -1067,7 +1067,7 @@ int _starpu_cuda_driver_deinit(struct _starpu_worker_set *worker_set)
 
				 	}
			
 
				 
			
 
				 	worker_set->workers[0].worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CUDA_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_CUDA_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/src/drivers/max/driver_fpga.c
+++ b/src/drivers/max/driver_fpga.c
@@ -208,7 +208,7 @@ int _starpu_fpga_driver_init(struct _starpu_worker *worker)
 
				 {
			
 
				 	int devid = worker->devid;
			
 
				 	//fpga_msg("successful till here");
			
 
				-	_starpu_driver_start(worker, _STARPU_FUT_CPU_KEY, 1);
			
 
				+	_starpu_driver_start(worker, STARPU_FPGA_WORKER, 1);
			
 
				 	/* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
			
 
				 	// TODO: drop test when we allocated a memory node for fpga
			
 
				 	if (worker->memory_node != STARPU_MAIN_RAM)
			
@@ -394,7 +394,7 @@ int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
 
				 	_starpu_free_all_automatically_allocated_buffers(memnode);
			
 
				 
			
 
				 	fpga_worker->worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_FPGA_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -521,10 +521,10 @@ void *_starpu_mic_src_worker(void *arg)
 
				 
			
 
				 	/* unsigned memnode = baseworker->memory_node; */
			
 
				 
			
 
				-	_starpu_driver_start(baseworker, _STARPU_FUT_MIC_KEY, 0);
			
 
				+	_starpu_driver_start(baseworker, STARPU_MIC_WORKER, 0);
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	for (i = 1; i < worker_set->nworkers; i++)
			
 
				-		_starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MIC_KEY, 0);
			
 
				+		_starpu_worker_start(&worker_set->workers[i], STARPU_FUT_WORKER, 0);
			
 
				 #endif
			
 
				 
			
 
				 	// Current task for a thread managing a worker set has no sense.
			
--- a/src/drivers/mpi/driver_mpi_source.c
+++ b/src/drivers/mpi/driver_mpi_source.c
@@ -310,11 +310,11 @@ void *_starpu_mpi_src_worker(void *arg)
 
				 
			
 
				                 /* unsigned memnode = baseworker->memory_node; */
			
 
				 
			
 
				-                _starpu_driver_start(baseworker, _STARPU_FUT_MPI_KEY, 0);
			
 
				+                _starpu_driver_start(baseworker, STARPU_CPU_WORKER, 0);
			
 
				 
			
 
				 #ifdef STARPU_USE_FXT
			
 
				                 for (i = 1; i < worker_set->nworkers; i++)
			
 
				-                        _starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MPI_KEY, 0);
			
 
				+                        _starpu_worker_start(&worker_set->workers[i], STARPU_MPI_WORKER, 0);
			
 
				 #endif
			
 
				 
			
 
				                 // Current task for a thread managing a worker set has no sense.
			
--- a/src/drivers/opencl/driver_opencl.c
+++ b/src/drivers/opencl/driver_opencl.c
@@ -623,7 +623,7 @@ int _starpu_opencl_driver_init(struct _starpu_worker *worker)
 
				 {
			
 
				 	int devid = worker->devid;
			
 
				 
			
 
				-	_starpu_driver_start(worker, _STARPU_FUT_OPENCL_KEY, 0);
			
 
				+	_starpu_driver_start(worker, STARPU_OPENCL_WORKER, 0);
			
 
				 
			
 
				 	_starpu_opencl_init_context(devid);
			
 
				 
			
@@ -853,7 +853,7 @@ int _starpu_opencl_driver_deinit(struct _starpu_worker *worker)
 
				         _starpu_opencl_deinit_context(devid);
			
 
				 
			
 
				 	worker->worker_is_initialized = 0;
			
 
				-	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_OPENCL_KEY);
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(STARPU_OPENCL_WORKER);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/src/sched_policies/heteroprio.c
+++ b/src/sched_policies/heteroprio.c
@@ -37,7 +37,7 @@
 
				 #define DBL_MAX __DBL_MAX__
			
 
				 #endif
			
 
				 
			
 
				-#define STARPU_NB_TYPES (STARPU_MAX_WORKER+1)
			
 
				+#define STARPU_NB_TYPES STARPU_NARCH
			
 
				 
			
 
				 /* A bucket corresponds to a Pair of priorities
			
 
				  * When a task is pushed with a priority X, it will be stored
			
@@ -112,7 +112,7 @@ struct _starpu_heteroprio_data
 
				 
			
 
				 static int starpu_heteroprio_types_to_arch(enum starpu_worker_archtype arch)
			
 
				 {
			
 
				-	if (arch > STARPU_MAX_WORKER)
			
 
				+	if (arch >= STARPU_NARCH)
			
 
				 		return 0;
			
 
				 	return STARPU_WORKER_TO_MASK(arch);
			
 
				 }
			
@@ -174,7 +174,7 @@ static inline void default_init_sched(unsigned sched_ctx_id)
 
				 	enum starpu_worker_archtype type;
			
 
				 
			
 
				 	// By default each type of devices uses 1 bucket and no slow factor
			
 
				-	for (type = 0; type <= STARPU_MAX_WORKER; type++)
			
 
				+	for (type = 0; type < STARPU_NARCH; type++)
			
 
				 		if (starpu_worker_get_count_by_type(type) > 0)
			
 
				 			starpu_heteroprio_set_nb_prios(sched_ctx_id, type, max_prio-min_prio+1);
			
 
				 
			
@@ -183,7 +183,7 @@ static inline void default_init_sched(unsigned sched_ctx_id)
 
				 	for(prio=min_prio ; prio<=max_prio ; prio++)
			
 
				 	{
			
 
				 		// By default each type of devices uses 1 bucket and no slow factor
			
 
				-		for (type = 0; type <= STARPU_MAX_WORKER; type++)
			
 
				+		for (type = 0; type < STARPU_NARCH; type++)
			
 
				 			if (starpu_worker_get_count_by_type(type) > 0)
			
 
				 				starpu_heteroprio_set_mapping(sched_ctx_id, type, prio, prio);
			
 
				 	}
			
--- a/src/sched_policies/modular_ez.c
+++ b/src/sched_policies/modular_ez.c
@@ -112,8 +112,8 @@ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id,
 
				 		nummaxids = starpu_worker_get_count() + starpu_combined_worker_get_count();
			
 
				 		if (starpu_memory_nodes_get_count() > nummaxids)
			
 
				 			nummaxids = starpu_memory_nodes_get_count();
			
 
				-		if (STARPU_ANY_WORKER > nummaxids)
			
 
				-			nummaxids = STARPU_ANY_WORKER;
			
 
				+		if (STARPU_NARCH > nummaxids)
			
 
				+			nummaxids = STARPU_NARCH;
			
 
				 
			
 
				 		if (sched == 0)
			
 
				 			decide_flags = flags & STARPU_SCHED_SIMPLE_DECIDE_MASK;
			
@@ -154,7 +154,7 @@ void starpu_sched_component_initialize_simple_schedulers(unsigned sched_ctx_id,
 
				 			/* Count available architecture types */
			
 
				 			enum starpu_worker_archtype type;
			
 
				 			nbelow = 0;
			
 
				-			for (type = STARPU_CPU_WORKER; type < STARPU_ANY_WORKER; type++)
			
 
				+			for (type = 0; type < STARPU_NARCH; type++)
			
 
				 			{
			
 
				 				if (starpu_worker_get_count_by_type(type))
			
 
				 				{
			
--- a/src/util/fstarpu.c
+++ b/src/util/fstarpu.c
@@ -81,6 +81,7 @@ static const intptr_t fstarpu_cuda_worker = STARPU_CUDA_WORKER;
 
				 static const intptr_t fstarpu_opencl_worker = STARPU_OPENCL_WORKER;
			
 
				 static const intptr_t fstarpu_mic_worker = STARPU_MIC_WORKER;
			
 
				 static const intptr_t fstarpu_any_worker = STARPU_ANY_WORKER;
			
 
				+static const intptr_t fstarpu_narch = STARPU_NARCH;
			
 
				 
			
 
				 static const intptr_t fstarpu_nmaxbufs = STARPU_NMAXBUFS;
			
 
				 
			
@@ -174,6 +175,7 @@ intptr_t fstarpu_get_constant(char *s)
 
				 	else if (!strcmp(s, "FSTARPU_OPENCL_WORKER"))	{ return fstarpu_opencl_worker; }
			
 
				 	else if (!strcmp(s, "FSTARPU_MIC_WORKER"))	{ return fstarpu_mic_worker; }
			
 
				 	else if (!strcmp(s, "FSTARPU_ANY_WORKER"))	{ return fstarpu_any_worker; }
			
 
				+	else if (!strcmp(s, "FSTARPU_NARCH"))	{ return fstarpu_narch; }
			
 
				 
			
 
				 	else if (!strcmp(s, "FSTARPU_NMAXBUFS"))	{ return fstarpu_nmaxbufs; }
			
 
				 
			
--- a/tools/dev/checker/rename_internal.sed
+++ b/tools/dev/checker/rename_internal.sed
@@ -26,9 +26,6 @@ s/\b_starpu_get_memory_node_description\b/_starpu_memory_node_get_description/g
 
				 
			
 
				 s/\bheft_policy\b/_starpu_sched_heft_policy/g
			
 
				 s/\bstruct starpu_priority_taskq_s\b/struct _starpu_priority_taskq/g
			
 
				-s/\bSTARPU_FUT_APPS_KEY\b/_STARPU_FUT_APPS_KEY/g
			
 
				-s/\bSTARPU_FUT_CPU_KEY\b/_STARPU_FUT_CPU_KEY/g
			
 
				-s/\bSTARPU_FUT_CUDA_KEY\b/_STARPU_FUT_CUDA_KEY/g
			
 
				 s/\bSTARPU_FUT_DATA_COPY\b/_STARPU_FUT_DATA_COPY/g
			
 
				 s/\bSTARPU_FUT_DO_PROBE3STR\b/_STARPU_FUT_DO_PROBE3STR/g
			
 
				 s/\bSTARPU_FUT_DO_PROBE4STR\b/_STARPU_FUT_DO_PROBE4STR/g
			
@@ -46,7 +43,6 @@ s/\bSTARPU_FUT_END_PUSH_OUTPUT\b/_STARPU_FUT_END_PUSH_OUTPUT/g
 
				 s/\bSTARPU_FUT_JOB_POP\b/_STARPU_FUT_JOB_POP/g
			
 
				 s/\bSTARPU_FUT_JOB_PUSH\b/_STARPU_FUT_JOB_PUSH/g
			
 
				 s/\bSTARPU_FUT_NEW_MEM_NODE\b/_STARPU_FUT_NEW_MEM_NODE/g
			
 
				-s/\bSTARPU_FUT_OPENCL_KEY\b/_STARPU_FUT_OPENCL_KEY/g
			
 
				 s/\bSTARPU_FUT_SET_PROFILING\b/_STARPU_FUT_SET_PROFILING/g
			
 
				 s/\bSTARPU_FUT_START_ALLOC\b/_STARPU_FUT_START_ALLOC/g
			
 
				 s/\bSTARPU_FUT_START_ALLOC_REUSE\b/_STARPU_FUT_START_ALLOC_REUSE/g