13 年之前 · 51b24a4efd
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -430,7 +430,7 @@ needs to be called to destroy the dummy task afterwards. See
 
				 @node Theoretical lower bound on execution time
			
 
				 @section Theoretical lower bound on execution time
			
 
				 
			
 
				-For kernels with history-based performance models, StarPU can very easily provide a theoretical lower
			
 
				+For kernels with history-based performance models (and provided that they are completely calibrated), StarPU can very easily provide a theoretical lower
			
 
				 bound for the execution time of a whole set of tasks. See for
			
 
				 instance @code{examples/lu/lu_example.c}: before submitting tasks,
			
 
				 call @code{starpu_bound_start}, and after complete execution, call
			
@@ -1015,9 +1015,11 @@ be enabled by using the @code{cuda_opengl_interoperability} field of the
 
				 @code{starpu_conf} structure, and the driver loop has to be run by
			
 
				 the application, by using the @code{not_launched_drivers} field of
			
 
				 @code{starpu_conf} to prevent StarPU from running it in a separate thread, and
			
 
				-by using @code{starpu_driver_run} to run the loop. The @code{gl_interop} example
			
 
				-shows how it articulates in a simple case, where rendering is done in task
			
 
				-callbacks. TODO: provide glutIdleFunc alternative.
			
 
				+by using @code{starpu_driver_run} to run the loop. The @code{gl_interop} and
			
 
				+@code{gl_interop_idle} examples shows how it articulates in a simple case, where
			
 
				+rendering is done in task callbacks. The former uses @code{glutMainLoopEvent}
			
 
				+to make GLUT progress from the StarPU driver loop, while the latter uses
			
 
				+@code{glutIdleFunc} to make StarPU progress from the GLUT main loop.
			
 
				 
			
 
				 Then, to use an OpenGL buffer as a CUDA data, StarPU simply needs to be given
			
 
				 the CUDA pointer at registration, for instance:
			
--- a/doc/chapters/basic-api.texi
+++ b/doc/chapters/basic-api.texi
@@ -1571,7 +1571,7 @@ submitted if it has not been properly initialized.
 
				 Initialize @var{task} with default values. This function is implicitly
			
 
				 called by @code{starpu_task_create}. By default, tasks initialized with
			
 
				 @code{starpu_task_init} must be deinitialized explicitly with
			
 
				-@code{starpu_task_deinit}. Tasks can also be initialized statically,
			
 
				+@code{starpu_task_clean}. Tasks can also be initialized statically,
			
 
				 using @code{STARPU_TASK_INITIALIZER} defined below.
			
 
				 @end deftypefun
			
 
				 
			
@@ -1592,11 +1592,14 @@ by the task have to be freed by calling
 
				 @code{starpu_task_destroy}.
			
 
				 @end deftypefun
			
 
				 
			
 
				-@deftypefun void starpu_task_deinit ({struct starpu_task} *@var{task})
			
 
				+@deftypefun void starpu_task_clean ({struct starpu_task} *@var{task})
			
 
				 Release all the structures automatically allocated to execute @var{task}, but
			
 
				-not the task structure itself. It is thus useful for statically allocated tasks
			
 
				-for instance.  It is called automatically by @code{starpu_task_destroy}.  It
			
 
				-has to be called only after explicitly waiting for the task or after
			
 
				+not the task structure itself and values set by the user remain unchanged.
			
 
				+It is thus useful for statically allocated tasks for instance.
			
 
				+It is also useful when the user wants to execute the same operation several
			
 
				+times with as least overhead as possible.
			
 
				+It is called automatically by @code{starpu_task_destroy}.
			
 
				+It has to be called only after explicitly waiting for the task or after
			
 
				 @code{starpu_shutdown} (waiting for the callback is not enough, since starpu
			
 
				 still manipulates the task after calling the callback).
			
 
				 @end deftypefun
			
--- a/doc/chapters/perf-feedback.texi
+++ b/doc/chapters/perf-feedback.texi
@@ -415,7 +415,7 @@ Print the DAG that was recorded
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun void starpu_bound_compute ({double *}@var{res}, {double *}@var{integer_res}, int @var{integer})
			
 
				-Get theoretical upper bound (in ms) (needs glpk support detected by @code{configure} script)
			
 
				+Get theoretical upper bound (in ms) (needs glpk support detected by @code{configure} script). It returns 0 if some performance models are not calibrated.
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun void starpu_bound_print_lp ({FILE *}@var{output})
			
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -275,7 +275,7 @@ void feed(void) @{
 
				 	starpu_vector_data_register(&handle, -1, 0, measure->size, sizeof(float));
			
 
				 	task.handles[0] = handle;
			
 
				 	starpu_perfmodel_update_history(&perf_model, &task, STARPU_CUDA_DEFAULT + measure->cudadev, 0, measure->implementation, measure->time);
			
 
				-	starpu_task_deinit(&task);
			
 
				+	starpu_task_clean(&task);
			
 
				 	starpu_data_unregister(handle);
			
 
				     @}
			
 
				 @}
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -844,13 +844,20 @@ endif
 
				 
			
 
				 if HAVE_OPENGL
			
 
				 examplebin_PROGRAMS +=				\
			
 
				-	gl_interop/gl_interop
			
 
				+	gl_interop/gl_interop			\
			
 
				+	gl_interop/gl_interop_idle
			
 
				 
			
 
				 gl_interop_gl_interop_SOURCES =			\
			
 
				 	gl_interop/gl_interop.c
			
 
				 
			
 
				 gl_interop_gl_interop_LDADD =			\
			
 
				 	$(STARPU_OPENGL_RENDER_LDFLAGS)
			
 
				+
			
 
				+gl_interop_gl_interop_idle_SOURCES =		\
			
 
				+	gl_interop/gl_interop_idle.c
			
 
				+
			
 
				+gl_interop_gl_interop_idle_LDADD =		\
			
 
				+	$(STARPU_OPENGL_RENDER_LDFLAGS)
			
 
				 endif
			
 
				 
			
 
				 ####################
			
--- a/examples/gl_interop/gl_interop.c
+++ b/examples/gl_interop/gl_interop.c
@@ -118,7 +118,7 @@ int main(int argc, char **argv)
 
				 	ret = starpu_task_submit(task);
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				 
			
 
				-	/* And run the driver, which will run the task */
			
 
				+	/* And run the driver inside main, which will run the task */
			
 
				 	printf("running the driver\n");
			
 
				 	starpu_driver_run(&drivers[0]);
			
 
				 	printf("finished running the driver\n");
			
--- a/examples/gl_interop/gl_interop_idle.c
+++ b/examples/gl_interop/gl_interop_idle.c
@@ -0,0 +1,152 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 Université de Bordeaux 1
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * This example demonstrates how to use StarPU combined with OpenGL rendering,
			
 
				+ * which needs:
			
 
				+ *
			
 
				+ * - initializing GLUT first,
			
 
				+ * - enabling it at initialization,
			
 
				+ * - running the corresponding CUDA worker in the GLUT thread (here, the main
			
 
				+ *   thread).
			
 
				+ *
			
 
				+ * The difference with gl_interop.c is that this version runs StarPU Tasks in
			
 
				+ * the glut idle handler.
			
 
				+ */
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <unistd.h>
			
 
				+#include <GL/glut.h>
			
 
				+
			
 
				+void dummy(void *buffers[], void *cl_arg)
			
 
				+{
			
 
				+	float *v = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
			
 
				+
			
 
				+	printf("Codelet running\n");
			
 
				+	cudaMemset(v, 0, STARPU_VECTOR_GET_NX(buffers[0]) * sizeof(float));
			
 
				+	printf("Codelet done\n");
			
 
				+}
			
 
				+
			
 
				+struct starpu_codelet cl = {
			
 
				+	.where = STARPU_CUDA,
			
 
				+	.cuda_funcs = { dummy, NULL },
			
 
				+	.nbuffers = 1,
			
 
				+	.modes = { STARPU_W },
			
 
				+};
			
 
				+
			
 
				+void foo(void) {
			
 
				+}
			
 
				+
			
 
				+void display(float i) {
			
 
				+	glClear(GL_COLOR_BUFFER_BIT);
			
 
				+	glColor3f(1, 1, 1);
			
 
				+	glBegin(GL_LINES);
			
 
				+	glVertex2f(-i, -i);
			
 
				+	glVertex2f(i, i);
			
 
				+	glEnd();
			
 
				+	glFinish();
			
 
				+	glutPostRedisplay();
			
 
				+}
			
 
				+
			
 
				+static int cuda_devices[] = { 0 };
			
 
				+static struct starpu_driver drivers[] = {
			
 
				+	{ .type = STARPU_CUDA_WORKER }
			
 
				+};
			
 
				+
			
 
				+void callback_func(void *foo) {
			
 
				+	printf("Callback running, rendering\n");
			
 
				+	float i = 1.;
			
 
				+	while (i > 0) {
			
 
				+		usleep(100000);
			
 
				+		display(i);
			
 
				+		i -= 0.1;
			
 
				+	}
			
 
				+	printf("rendering done\n");
			
 
				+
			
 
				+	/* Tell it was already the last submitted task */
			
 
				+	starpu_drivers_request_termination();
			
 
				+
			
 
				+	/* And terminate StarPU */
			
 
				+	starpu_driver_deinit(&drivers[0]);
			
 
				+	starpu_shutdown();
			
 
				+	exit(0);
			
 
				+}
			
 
				+
			
 
				+static void idle(void)
			
 
				+{
			
 
				+	starpu_driver_run_once(&drivers[0]);
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+#if !(defined(STARPU_USE_CUDA) && defined(STARPU_OPENGL_RENDER))
			
 
				+	return 77;
			
 
				+#else
			
 
				+	struct starpu_conf conf;
			
 
				+	int ret;
			
 
				+	struct starpu_task *task;
			
 
				+	starpu_data_handle_t handle;
			
 
				+	int cuda_device = 0;
			
 
				+
			
 
				+	cuda_devices[0] = cuda_device;
			
 
				+	drivers[0].id.cuda_id = cuda_device;
			
 
				+
			
 
				+	glutInit(&argc, argv);
			
 
				+	glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB);
			
 
				+	glutInitWindowPosition(0, 0);
			
 
				+	glutInitWindowSize(300,200);
			
 
				+	glutCreateWindow("StarPU OpenGL interoperability test");
			
 
				+	glClearColor (0.5, 0.5, 0.5, 0.0);
			
 
				+
			
 
				+	/* Enable OpenGL interoperability */
			
 
				+	starpu_conf_init(&conf);
			
 
				+	conf.ncuda = 1;
			
 
				+	conf.ncpus = 0;
			
 
				+	conf.nopencl = 0;
			
 
				+	conf.cuda_opengl_interoperability = cuda_devices;
			
 
				+	conf.n_cuda_opengl_interoperability = sizeof(cuda_devices) / sizeof(*cuda_devices);
			
 
				+	conf.not_launched_drivers = drivers;
			
 
				+	conf.n_not_launched_drivers = sizeof(drivers) / sizeof(*drivers);
			
 
				+	ret = starpu_init(&conf);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
			
 
				+
			
 
				+	starpu_vector_data_register(&handle, -1, 0, 10, sizeof(float));
			
 
				+
			
 
				+	/* Submit just one dumb task */
			
 
				+	task = starpu_task_create();
			
 
				+	task->cl = &cl;
			
 
				+	task->handles[0] = handle;
			
 
				+	task->callback_func = callback_func;
			
 
				+	task->callback_arg = NULL;
			
 
				+	ret = starpu_task_submit(task);
			
 
				+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
			
 
				+
			
 
				+	/* And run the driver inside main, which will run the task */
			
 
				+	printf("running the driver\n");
			
 
				+	/* Initialize it */
			
 
				+	starpu_driver_init(&drivers[0]);
			
 
				+	/* Register driver loop content as idle handler */
			
 
				+	glutIdleFunc(idle);
			
 
				+	/* Now run the glut loop */
			
 
				+	glutMainLoop();
			
 
				+	starpu_driver_run(&drivers[0]);
			
 
				+	printf("finished running the driver\n");
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return 0;
			
 
				+#endif
			
 
				+}
			
--- a/include/starpu_deprecated_api.h
+++ b/include/starpu_deprecated_api.h
@@ -46,13 +46,15 @@ typedef struct starpu_multiformat_interface starpu_multiformat_interface_t;
 
				 
			
 
				 typedef struct starpu_buffer_descr starpu_buffer_descr;
			
 
				 typedef struct starpu_codelet starpu_codelet;
			
 
				+typedef struct starpu_codelet starpu_codelet_t;
			
 
				 typedef enum starpu_access_mode starpu_access_mode;
			
 
				 
			
 
				 #define starpu_print_bus_bandwidth     starpu_bus_print_bandwidth
			
 
				 #define starpu_get_handle_interface_id starpu_handle_get_interface_id
			
 
				 #define starpu_get_current_task        starpu_task_get_current
			
 
				 #define starpu_unpack_cl_args          starpu_codelet_unpack_args
			
 
				-#define starpu_pack_cl_args            starpu_codelet_pack_args
			
 
				+#define starpu_pack_cl_args   	       starpu_codelet_pack_args
			
 
				+#define starpu_task_deinit	       starpu_task_clean
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -193,6 +193,12 @@ struct starpu_task
 
				 	 * by hand (without starpu_task_create), this field should be set to
			
 
				 	 * NULL. */
			
 
				 	void *starpu_private;
			
 
				+
			
 
				+	/* the magic field is set when initialising the task.
			
 
				+	 * starpu_task_submit will fail if the field does not have the
			
 
				+	 * right value. This will hence avoid submitting tasks which
			
 
				+	 * have not been properly initialised.
			
 
				+	 */
			
 
				 	int magic;
			
 
				 
			
 
				 	/* Scheduling context */
			
@@ -285,9 +291,11 @@ void starpu_task_init(struct starpu_task *task);
 
				 
			
 
				 /* Release all the structures automatically allocated to execute the task. This
			
 
				  * is called implicitely by starpu_task_destroy, but the task structure itself
			
 
				- * is not freed. This should be used for statically allocated tasks for
			
 
				- * instance. */
			
 
				-void starpu_task_deinit(struct starpu_task *task);
			
 
				+ * is not freed. Values previously set by the user remain unchanged.
			
 
				+ * This should be used for statically allocated tasks for instance.
			
 
				+ * It should also be used for submitting the same task several times.
			
 
				+ */
			
 
				+void starpu_task_clean(struct starpu_task *task);
			
 
				 
			
 
				 /* Allocate a task structure and initialize it with default values. Tasks
			
 
				  * allocated dynamically with starpu_task_create are automatically freed when
			
--- a/src/core/perfmodel/perfmodel.c
+++ b/src/core/perfmodel/perfmodel.c
@@ -292,7 +292,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 
				 		handle->refcnt--;
			
 
				 		handle->busy_count--;
			
 
				 		_starpu_spin_unlock(&handle->header_lock);
			
 
				-		starpu_task_deinit(conversion_task);
			
 
				+		starpu_task_clean(conversion_task);
			
 
				 		free(conversion_task);
			
 
				 	}
			
 
				 
			
--- a/src/core/perfmodel/perfmodel.h
+++ b/src/core/perfmodel/perfmodel.h
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  Télécom-SudParis
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -22,10 +22,8 @@
 
				 #include <common/config.h>
			
 
				 #include <starpu.h>
			
 
				 #include <starpu_perfmodel.h>
			
 
				-//#include <core/jobs.h>
			
 
				 #include <common/htable32.h>
			
 
				 #include <core/task_bundle.h>
			
 
				-//#include <core/workers.h>
			
 
				 #include <pthread.h>
			
 
				 #include <stdio.h>
			
 
				 
			
@@ -33,15 +31,6 @@ struct starpu_buffer_descr;
 
				 struct _starpu_job;
			
 
				 enum starpu_perf_archtype;
			
 
				 
			
 
				-///* File format */
			
 
				-//struct model_file_format
			
 
				-// {
			
 
				-//	unsigned ncore_entries;
			
 
				-//	unsigned ncuda_entries;
			
 
				-//	/* contains core entries, then cuda ones */
			
 
				-//	struct starpu_history_entry entries[];
			
 
				-//}
			
 
				-
			
 
				 void _starpu_get_perf_model_dir(char *path, size_t maxlen);
			
 
				 void _starpu_get_perf_model_dir_codelets(char *path, size_t maxlen);
			
 
				 void _starpu_get_perf_model_dir_bus(char *path, size_t maxlen);
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -54,8 +54,8 @@ struct dev_timing
 
				 	double timing_dtoh;
			
 
				 };
			
 
				 
			
 
				-static double bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{NAN}};
			
 
				-static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES] = {{NAN}};
			
 
				+static double bandwidth_matrix[STARPU_MAXNODES][STARPU_MAXNODES];
			
 
				+static double latency_matrix[STARPU_MAXNODES][STARPU_MAXNODES];
			
 
				 static unsigned was_benchmarked = 0;
			
 
				 static unsigned ncpus = 0;
			
 
				 static int ncuda = 0;
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -1021,7 +1021,10 @@ double _starpu_history_based_job_expected_perf(struct starpu_perfmodel *model, e
 
				 
			
 
				 	if (isnan(exp) && !model->benchmarking)
			
 
				 	{
			
 
				-		_STARPU_DISP("Warning: model %s is not calibrated enough, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol);
			
 
				+		char archname[32];
			
 
				+
			
 
				+		starpu_perfmodel_get_arch_name(arch, archname, sizeof(archname), nimpl);
			
 
				+		_STARPU_DISP("Warning: model %s is not calibrated enough for %s, forcing calibration for this run. Use the STARPU_CALIBRATE environment variable to control this.\n", model->symbol, archname);
			
 
				 		_starpu_set_calibrate_flag(1);
			
 
				 		model->benchmarking = 1;
			
 
				 	}
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -77,8 +77,10 @@ void starpu_task_init(struct starpu_task *task)
 
				 }
			
 
				 
			
 
				 /* Free all the ressources allocated for a task, without deallocating the task
			
 
				- * structure itself (this is required for statically allocated tasks). */
			
 
				-void starpu_task_deinit(struct starpu_task *task)
			
 
				+ * structure itself (this is required for statically allocated tasks).
			
 
				+ * All values previously set by the user, like codelet and handles, remain
			
 
				+ * unchanged */
			
 
				+void starpu_task_clean(struct starpu_task *task)
			
 
				 {
			
 
				 	STARPU_ASSERT(task);
			
 
				 
			
@@ -135,7 +137,7 @@ void _starpu_task_destroy(struct starpu_task *task)
 
				    }
			
 
				    else
			
 
				    {
			
 
				-	   starpu_task_deinit(task);
			
 
				+	   starpu_task_clean(task);
			
 
				 	   /* TODO handle the case of task with detach = 1 and destroy = 1 */
			
 
				 	   /* TODO handle the case of non terminated tasks -> return -EINVAL */
			
 
				 	   free(task);
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -23,8 +23,8 @@
 
				 #include <core/debug.h>
			
 
				 #include <core/topology.h>
			
 
				 #include <drivers/cuda/driver_cuda.h>
			
 
				-#include <starpu_hash.h>
			
 
				 #include <profiling/profiling.h>
			
 
				+#include <common/uthash.h>
			
 
				 
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 #include <hwloc.h>
			
@@ -50,7 +50,13 @@ static void _starpu_initialize_workers_bindid(struct _starpu_machine_config *con
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
 
				 #  ifdef STARPU_USE_CUDA
			
 
				 static void _starpu_initialize_workers_cuda_gpuid(struct _starpu_machine_config *config);
			
 
				-static struct starpu_htbl32_node *devices_using_cuda = NULL;
			
 
				+/* Entry in the `devices_using_cuda' hash table.  */
			
 
				+struct handle_entry
			
 
				+{
			
 
				+	UT_hash_handle hh;
			
 
				+	unsigned gpuid;
			
 
				+};
			
 
				+static struct handle_entry *devices_using_cuda;
			
 
				 #  endif
			
 
				 #  ifdef STARPU_USE_OPENCL
			
 
				 static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_config *config);
			
@@ -92,8 +98,11 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				                 int i;
			
 
				                 for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
			
 
				 		{
			
 
				-                        uint32_t key = starpu_crc32_be(config->topology.workers_opencl_gpuid[i], 0);
			
 
				-                        if (_starpu_htbl_search_32(devices_using_cuda, key) == NULL)
			
 
				+			struct handle_entry *entry;
			
 
				+			int devid = config->topology.workers_opencl_gpuid[i];
			
 
				+
			
 
				+			HASH_FIND_INT(devices_using_cuda, &devid, entry);
			
 
				+			if (entry == NULL)
			
 
				 			{
			
 
				                                 tmp[nb] = topology->workers_opencl_gpuid[i];
			
 
				                                 nb++;
			
@@ -105,18 +114,24 @@ static void _starpu_initialize_workers_opencl_gpuid(struct _starpu_machine_confi
 
				 #endif /* STARPU_USE_CUDA */
			
 
				         {
			
 
				                 // Detect identical devices
			
 
				-                struct starpu_htbl32_node *devices_already_used = NULL;
			
 
				+		struct handle_entry *devices_already_used = NULL;
			
 
				                 unsigned tmp[STARPU_NMAXWORKERS];
			
 
				                 unsigned nb=0;
			
 
				                 int i;
			
 
				 
			
 
				                 for(i=0 ; i<STARPU_NMAXWORKERS ; i++)
			
 
				 		{
			
 
				-                        uint32_t key = starpu_crc32_be(topology->workers_opencl_gpuid[i], 0);
			
 
				-                        if (_starpu_htbl_search_32(devices_already_used, key) == NULL)
			
 
				+			int devid = topology->workers_opencl_gpuid[i];
			
 
				+			struct handle_entry *entry;
			
 
				+			HASH_FIND_INT(devices_already_used, &devid, entry);
			
 
				+			if (entry == NULL)
			
 
				 			{
			
 
				-                                _starpu_htbl_insert_32(&devices_already_used, key, config);
			
 
				-                                tmp[nb] = topology->workers_opencl_gpuid[i];
			
 
				+				struct handle_entry *entry2;
			
 
				+				entry2 = (struct handle_entry *) malloc(sizeof(*entry2));
			
 
				+				STARPU_ASSERT(entry2 != NULL);
			
 
				+				entry2->gpuid = devid;
			
 
				+				HASH_ADD_INT(devices_already_used, gpuid, entry2);
			
 
				+                                tmp[nb] = devid;
			
 
				                                 nb ++;
			
 
				                         }
			
 
				                 }
			
@@ -332,8 +347,11 @@ static int _starpu_init_machine_config(struct _starpu_machine_config *config)
 
				 		_starpu_init_sched_ctx_for_worker(config->workers[topology->nworkers + cudagpu].workerid);
			
 
				 		config->worker_mask |= STARPU_CUDA;
			
 
				 
			
 
				-                uint32_t key = starpu_crc32_be(devid, 0);
			
 
				-                _starpu_htbl_insert_32(&devices_using_cuda, key, config);
			
 
				+		struct handle_entry *entry;
			
 
				+		entry = (struct handle_entry *) malloc(sizeof(*entry));
			
 
				+		STARPU_ASSERT(entry != NULL);
			
 
				+		entry->gpuid = devid;
			
 
				+		HASH_ADD_INT(devices_using_cuda, gpuid, entry);
			
 
				         }
			
 
				 
			
 
				 	topology->nworkers += topology->ncudagpus;
			
@@ -873,6 +891,12 @@ void _starpu_destroy_topology(struct _starpu_machine_config *config __attribute_
 
				 
			
 
				 	topology_is_initialized = 0;
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				+	struct handle_entry *entry, *tmp;
			
 
				+	HASH_ITER(hh, devices_using_cuda, entry, tmp)
			
 
				+	{
			
 
				+		HASH_DEL(devices_using_cuda, entry);
			
 
				+		free(entry);
			
 
				+	}
			
 
				 	devices_using_cuda = NULL;
			
 
				 #endif
			
 
				 #if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
--- a/src/debug/traces/starpu_fxt_dag.c
+++ b/src/debug/traces/starpu_fxt_dag.c
@@ -67,7 +67,7 @@ void _starpu_fxt_dag_terminate(void)
 
				 void _starpu_fxt_dag_add_tag(uint64_t tag, unsigned long job_id)
			
 
				 {
			
 
				 	if (out_file)
			
 
				-		fprintf(out_file, "\t \"tag_%llx\"->\"task_%llx\"->\"tag_%llx\"\n",
			
 
				+		fprintf(out_file, "\t \"tag_%llx\"->\"task_%llx\"->\"tag_%llx\" [style=dashed]\n",
			
 
				 			(unsigned long long)tag, (unsigned long long)job_id, (unsigned long long) tag);
			
 
				 }
			
 
				 
			
--- a/src/sched_policies/eager_central_policy.c
+++ b/src/sched_policies/eager_central_policy.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				- * Copyright (C) 2010-2011  Université de Bordeaux 1
			
 
				- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
			
 
				+ * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2011  Centre National de la Recherche Scientifique
			
 
				  * Copyright (C) 2011  INRIA
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
@@ -55,6 +55,8 @@ static void eager_remove_workers(unsigned sched_ctx_id, int *workerids, unsigned
 
				 
			
 
				 static void initialize_eager_center_policy(unsigned sched_ctx_id) 
			
 
				 {
			
 
				+	if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: you are running the default eager scheduler, which does not include optimizations. Make sure to read the StarPU documentation about adding performance models in order to be able to use the heft or dmda schedulers instead.\n");
			
 
				+
			
 
				 	starpu_create_worker_collection_for_sched_ctx(sched_ctx_id, WORKER_LIST);
			
 
				 
			
 
				 	eager_center_policy_data *data = (eager_center_policy_data*)malloc(sizeof(eager_center_policy_data));
			
--- a/tests/main/regenerate.c
+++ b/tests/main/regenerate.c
@@ -121,7 +121,7 @@ int main(int argc, char **argv)
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */
			
 
				-	starpu_task_deinit(&task);
			
 
				+	starpu_task_clean(&task);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 
			
--- a/tests/main/static_restartable.c
+++ b/tests/main/static_restartable.c
@@ -103,7 +103,7 @@ int main(int argc, char **argv)
 
				 	FPRINTF(stderr, "Total: %f secs\n", timing/1000000);
			
 
				 	FPRINTF(stderr, "Per task: %f usecs\n", timing/ntasks);
			
 
				 
			
 
				-	starpu_task_deinit(&task);
			
 
				+	starpu_task_clean(&task);
			
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
--- a/tests/main/static_restartable_tag.c
+++ b/tests/main/static_restartable_tag.c
@@ -110,7 +110,7 @@ int main(int argc, char **argv)
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */
			
 
				-	starpu_task_deinit(&task);
			
 
				+	starpu_task_clean(&task);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 
			
--- a/tests/main/static_restartable_using_initializer.c
+++ b/tests/main/static_restartable_using_initializer.c
@@ -92,7 +92,7 @@ int main(int argc, char **argv)
 
				 		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
			
 
				 	}
			
 
				 
			
 
				-	starpu_task_deinit(&task);
			
 
				+	starpu_task_clean(&task);
			
 
				 	gettimeofday(&end, NULL);
			
 
				 
			
 
				 	timing = (double)((end.tv_sec - start.tv_sec)*1000000
			
--- a/tests/main/subgraph_repeat.c
+++ b/tests/main/subgraph_repeat.c
@@ -135,10 +135,10 @@ int main(int argc, char **argv)
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */
			
 
				-	starpu_task_deinit(&taskA);
			
 
				-	starpu_task_deinit(&taskB);
			
 
				-	starpu_task_deinit(&taskC);
			
 
				-	starpu_task_deinit(&taskD);
			
 
				+	starpu_task_clean(&taskA);
			
 
				+	starpu_task_clean(&taskB);
			
 
				+	starpu_task_clean(&taskC);
			
 
				+	starpu_task_clean(&taskD);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 
			
--- a/tests/main/subgraph_repeat_regenerate.c
+++ b/tests/main/subgraph_repeat_regenerate.c
@@ -141,10 +141,10 @@ int main(int argc, char **argv)
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */
			
 
				-	starpu_task_deinit(&taskA);
			
 
				-	starpu_task_deinit(&taskB);
			
 
				-	starpu_task_deinit(&taskC);
			
 
				-	starpu_task_deinit(&taskD);
			
 
				+	starpu_task_clean(&taskA);
			
 
				+	starpu_task_clean(&taskB);
			
 
				+	starpu_task_clean(&taskC);
			
 
				+	starpu_task_clean(&taskD);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 
			
--- a/tests/main/subgraph_repeat_regenerate_tag.c
+++ b/tests/main/subgraph_repeat_regenerate_tag.c
@@ -156,10 +156,10 @@ int main(int argc, char **argv)
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */
			
 
				-	starpu_task_deinit(&taskA);
			
 
				-	starpu_task_deinit(&taskB);
			
 
				-	starpu_task_deinit(&taskC);
			
 
				-	starpu_task_deinit(&taskD);
			
 
				+	starpu_task_clean(&taskA);
			
 
				+	starpu_task_clean(&taskB);
			
 
				+	starpu_task_clean(&taskC);
			
 
				+	starpu_task_clean(&taskD);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 
			
--- a/tests/main/subgraph_repeat_tag.c
+++ b/tests/main/subgraph_repeat_tag.c
@@ -141,10 +141,10 @@ int main(int argc, char **argv)
 
				 	starpu_shutdown();
			
 
				 
			
 
				 	/* Cleanup the statically allocated tasks after shutdown, as StarPU is still working on it after the callback */
			
 
				-	starpu_task_deinit(&taskA);
			
 
				-	starpu_task_deinit(&taskB);
			
 
				-	starpu_task_deinit(&taskC);
			
 
				-	starpu_task_deinit(&taskD);
			
 
				+	starpu_task_clean(&taskA);
			
 
				+	starpu_task_clean(&taskB);
			
 
				+	starpu_task_clean(&taskC);
			
 
				+	starpu_task_clean(&taskD);
			
 
				 
			
 
				 	return EXIT_SUCCESS;
			
 
				 
			
--- a/tests/main/wait_all_regenerable_tasks.c
+++ b/tests/main/wait_all_regenerable_tasks.c
@@ -106,7 +106,7 @@ int main(int argc, char **argv)
 
				 
			
 
				 	ret = starpu_task_wait_for_all();
			
 
				 	for (i = 0; i < K; i++)
			
 
				-		starpu_task_deinit(&task[i]);
			
 
				+		starpu_task_clean(&task[i]);
			
 
				 
			
 
				 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
			
 
				 
			
--- a/tests/perfmodels/feed.c
+++ b/tests/perfmodels/feed.c
@@ -75,7 +75,7 @@ int main(int argc, char **argv)
 
				 		/* Simulate Slow GPU */
			
 
				 		starpu_perfmodel_update_history(&model, &task, STARPU_CUDA_DEFAULT + 1, 0, 0, measured_slow);
			
 
				 		starpu_perfmodel_update_history(&nl_model, &task, STARPU_CUDA_DEFAULT + 1, 0, 0, measured_slow);
			
 
				-		starpu_task_deinit(&task);
			
 
				+		starpu_task_clean(&task);
			
 
				 		starpu_data_unregister(handle);
			
 
				 	}
			
 
				 
			
--- a/tools/dev/rename.sed
+++ b/tools/dev/rename.sed
@@ -17,6 +17,7 @@
 
				 s/\bstarpu_access_mode\b/enum starpu_access_mode/g
			
 
				 s/\bstruct starpu_codelet_t\b/struct starpu_codelet/g
			
 
				 s/\bstarpu_codelet\b/struct starpu_codelet/g
			
 
				+s/\bstarpu_codelet_t\b/struct starpu_codelet/g
			
 
				 s/\bstarpu_data_handle\b/starpu_data_handle_t/g
			
 
				 s/\bstarpu_block_interface_t\b/struct starpu_block_interface/g
			
 
				 s/\bstarpu_matrix_interface_t\b/struct starpu_matrix_interface/g
			
@@ -141,3 +142,4 @@ s/\bstarpu_get_handle_interface_id\b/starpu_handle_get_interface_id/g
 
				 s/\bstarpu_get_current_task\b/starpu_task_get_current/g
			
 
				 s/\bstarpu_pack_cl_args\b/starpu_codelet_pack_args/g
			
 
				 s/\bstarpu_unpack_cl_args\b/starpu_codelet_unpack_args/g
			
 
				+s/\bstarpu_task_deinit\b/starpu_task_clean/g
			
--- a/tools/starpu_calibrate_bus.c
+++ b/tools/starpu_calibrate_bus.c
@@ -78,7 +78,12 @@ int main(int argc, char **argv)
 
				 
			
 
				 	parse_args(argc, argv);
			
 
				 
			
 
				+	if (starpu_init(NULL) == -ENODEV)
			
 
				+		return 0; /* Nothing to calibrate, so this is a success :) */
			
 
				+
			
 
				 	starpu_force_bus_sampling();
			
 
				 
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				 	return 0;
			
 
				 }