瀏覽代碼

merge trunk@7185:7200

Nathalie Furmento 12 年之前
父節點
當前提交
748f020801

+ 1 - 0
configure.ac

@@ -943,6 +943,7 @@ if test x$use_fxt = xyes; then
 	if test x$use_fxt_from_system = xno; then
 	if test x$use_fxt_from_system = xno; then
 		FXT_CFLAGS="-I$fxtdir/include/ "
 		FXT_CFLAGS="-I$fxtdir/include/ "
 		FXT_LDFLAGS="-L$fxtdir/lib/"
 		FXT_LDFLAGS="-L$fxtdir/lib/"
+		AC_ARG_VAR(FXT_LDFLAGS)
 		FXT_LIBS="-lfxt"
 		FXT_LIBS="-lfxt"
 	else
 	else
 	    PKG_CHECK_MODULES([FXT],  [fxt])
 	    PKG_CHECK_MODULES([FXT],  [fxt])

+ 4 - 0
doc/chapters/configuration.texi

@@ -344,6 +344,10 @@ The AMD implementation of OpenCL is known to
 fail when copying data asynchronously. When using this implementation,
 fail when copying data asynchronously. When using this implementation,
 it is therefore necessary to disable asynchronous data transfers.
 it is therefore necessary to disable asynchronous data transfers.
 
 
+@item @code{STARPU_DISABLE_CUDA_GPU_GPU_DIRECT}
+Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
+instead. This permits to test the performance effect of GPU-Direct.
+
 @end table
 @end table
 
 
 >>>>>>> .merge-right.r7182
 >>>>>>> .merge-right.r7182

+ 1 - 1
examples/interface/complex_interface.c

@@ -134,7 +134,7 @@ static starpu_ssize_t complex_allocate_data_on_node(void *data_interface, uint32
 		}
 		}
 #endif
 #endif
 		default:
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	}
 
 
 	if (fail)
 	if (fail)

+ 4 - 4
examples/stencil/stencil-tasks.c

@@ -66,7 +66,7 @@ static void create_task_save_local(unsigned iter, unsigned z, int dir, unsigned
 	if (ret)
 	if (ret)
 	{
 	{
 		fprintf(stderr, "Could not submit task save: %d\n", ret);
 		fprintf(stderr, "Could not submit task save: %d\n", ret);
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 	}
 	}
 }
 }
 
 
@@ -155,7 +155,7 @@ void create_task_save(unsigned iter, unsigned z, int dir, unsigned local_rank)
 		{ /* R(z) != local & R(z+d) != local We don't have
 		{ /* R(z) != local & R(z+d) != local We don't have
 			      the saved data and don't need it, we shouldn't
 			      the saved data and don't need it, we shouldn't
 			      even have been called! */
 			      even have been called! */
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 		}
 		}
 	}
 	}
 #else /* !STARPU_USE_MPI */
 #else /* !STARPU_USE_MPI */
@@ -208,7 +208,7 @@ void create_task_update(unsigned iter, unsigned z, unsigned local_rank)
 	if (ret)
 	if (ret)
 	{
 	{
 		fprintf(stderr, "Could not submit task update block: %d\n", ret);
 		fprintf(stderr, "Could not submit task update block: %d\n", ret);
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 	}
 	}
 }
 }
 
 
@@ -243,7 +243,7 @@ void create_start_task(int z, int dir)
 	if (ret)
 	if (ret)
 	{
 	{
 		fprintf(stderr, "Could not submit task initial wait: %d\n", ret);
 		fprintf(stderr, "Could not submit task initial wait: %d\n", ret);
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 	}
 	}
 }
 }
 
 

+ 4 - 4
include/starpu_util.h

@@ -58,18 +58,18 @@ extern "C"
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err != 0) { \
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err != 0) { \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
-			STARPU_ASSERT(0); }}
+			STARPU_ABORT(); }}
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (err != value) { \
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (err != value) { \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
 			char xmessage[256]; strerror_r(-err, xmessage, 256); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d:%s>\n", message, err, xmessage); \
-			STARPU_ASSERT(0); }}
+			STARPU_ABORT(); }}
 #else
 #else
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err != 0) {		\
 #  define STARPU_CHECK_RETURN_VALUE(err, message) {if (err != 0) {		\
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
-			STARPU_ASSERT(0); }}
+			STARPU_ABORT(); }}
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (err != value) { \
 #  define STARPU_CHECK_RETURN_VALUE_IS(err, value, message) {if (err != value) { \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
 			fprintf(stderr, "StarPU function <%s> returned unexpected value: <%d>\n", message, err); \
-			STARPU_ASSERT(0); }}
+			STARPU_ABORT(); }}
 #endif /* STARPU_HAVE_STRERROR_R */
 #endif /* STARPU_HAVE_STRERROR_R */
 
 
 /* Return true (non-zero) if GCC version MAJ.MIN or later is being used
 /* Return true (non-zero) if GCC version MAJ.MIN or later is being used

+ 5 - 3
src/Makefile.am

@@ -23,6 +23,8 @@ EXTRA_DIST = dolib.c
 
 
 ldflags =
 ldflags =
 
 
+libstarpu_so_version = $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)
+
 if STARPU_HAVE_WINDOWS
 if STARPU_HAVE_WINDOWS
 
 
 LC_MESSAGES=C
 LC_MESSAGES=C
@@ -32,7 +34,7 @@ ldflags += -Xlinker --output-def -Xlinker .libs/libstarpu-@STARPU_EFFECTIVE_VERS
 
 
 if STARPU_HAVE_MS_LIB
 if STARPU_HAVE_MS_LIB
 .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib: libstarpu-@STARPU_EFFECTIVE_VERSION@.la dolib
 .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib: libstarpu-@STARPU_EFFECTIVE_VERSION@.la dolib
-	./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def libstarpu-0 .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib
+	./dolib "$(STARPU_MS_LIB)" $(STARPU_MS_LIB_ARCH) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.def $(libstarpu_so_version) .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib
 all-local: .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib
 all-local: .libs/libstarpu-@STARPU_EFFECTIVE_VERSION@.lib
 endif STARPU_HAVE_MS_LIB
 endif STARPU_HAVE_MS_LIB
 
 
@@ -51,8 +53,8 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ -DBU
 
 
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS)
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS)
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS)
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS)
-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined									\
-  -version-info $(LIBSTARPU_INTERFACE_CURRENT):$(LIBSTARPU_INTERFACE_REVISION):$(LIBSTARPU_INTERFACE_AGE)
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) $(FXT_LDFLAGS) -no-undefined									\
+  -version-info $(libstarpu_so_version)
 
 
 noinst_HEADERS = 						\
 noinst_HEADERS = 						\
 	core/dependencies/data_concurrency.h			\
 	core/dependencies/data_concurrency.h			\

+ 2 - 0
src/common/fxt.c

@@ -52,6 +52,8 @@ long _starpu_gettid()
 	long tid;
 	long tid;
 	thr_self(&tid);
 	thr_self(&tid);
 	return tid;
 	return tid;
+#elif defined(__MINGW32__)
+	return (long) GetCurrentThread();
 #else
 #else
 	return (long) pthread_self();
 	return (long) pthread_self();
 #endif
 #endif

+ 2 - 2
src/common/utils.c

@@ -87,7 +87,7 @@ void _starpu_mkpath_and_check(const char *path, mode_t mode)
 		{
 		{
 			fprintf(stderr,"Error making StarPU directory %s:\n", path);
 			fprintf(stderr,"Error making StarPU directory %s:\n", path);
 			perror("mkdir");
 			perror("mkdir");
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 		}
 		}
 
 
 		/* make sure that it is actually a directory */
 		/* make sure that it is actually a directory */
@@ -96,7 +96,7 @@ void _starpu_mkpath_and_check(const char *path, mode_t mode)
 		if (!S_ISDIR(sb.st_mode))
 		if (!S_ISDIR(sb.st_mode))
 		{
 		{
 			fprintf(stderr,"Error: %s is not a directory:\n", path);
 			fprintf(stderr,"Error: %s is not a directory:\n", path);
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 		}
 		}
 	}
 	}
 }
 }

+ 1 - 1
src/core/perfmodel/perfmodel.c

@@ -281,7 +281,7 @@ double starpu_task_expected_conversion_time(struct starpu_task *task,
 			node = opencl_node;
 			node = opencl_node;
 #endif
 #endif
 		if (node == -EINVAL)
 		if (node == -EINVAL)
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 
 
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 		if (!_starpu_handle_needs_conversion_task(handle, node))
 			continue;
 			continue;

+ 14 - 10
src/core/perfmodel/perfmodel_bus.c

@@ -191,11 +191,13 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	/* Initialize CUDA context on the source */
 	/* Initialize CUDA context on the source */
 	cudaSetDevice(src);
 	cudaSetDevice(src);
 
 
-	cures = cudaDeviceCanAccessPeer(&can, src, dst);
-	if (!cures && can) {
-		cures = cudaDeviceEnablePeerAccess(dst, 0);
-		if (!cures)
-			_STARPU_DISP("GPU-Direct %d -> %d\n", dst, src);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") <= 0) {
+		cures = cudaDeviceCanAccessPeer(&can, src, dst);
+		if (!cures && can) {
+			cures = cudaDeviceEnablePeerAccess(dst, 0);
+			if (!cures)
+				_STARPU_DISP("GPU-Direct %d -> %d\n", dst, src);
+		}
 	}
 	}
 
 
 	/* Allocate a buffer on the device */
 	/* Allocate a buffer on the device */
@@ -207,11 +209,13 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	/* Initialize CUDA context on the destination */
 	/* Initialize CUDA context on the destination */
 	cudaSetDevice(dst);
 	cudaSetDevice(dst);
 
 
-	cures = cudaDeviceCanAccessPeer(&can, dst, src);
-	if (!cures && can) {
-		cures = cudaDeviceEnablePeerAccess(src, 0);
-		if (!cures)
-			_STARPU_DISP("GPU-Direct %d -> %d\n", src, dst);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") <= 0) {
+		cures = cudaDeviceCanAccessPeer(&can, dst, src);
+		if (!cures && can) {
+			cures = cudaDeviceEnablePeerAccess(src, 0);
+			if (!cures)
+				_STARPU_DISP("GPU-Direct %d -> %d\n", src, dst);
+		}
 	}
 	}
 
 
 	/* Allocate a buffer on the device */
 	/* Allocate a buffer on the device */

+ 1 - 1
src/core/perfmodel/perfmodel_history.c

@@ -1162,7 +1162,7 @@ void _starpu_update_perfmodel_history(struct _starpu_job *j, struct starpu_perfm
 		if (f == NULL)
 		if (f == NULL)
 		{
 		{
 			_STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path);
 			_STARPU_DISP("Error <%s> when opening file <%s>\n", strerror(errno), per_arch_model->debug_path);
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 		}
 		}
 
 
 		if (!j->footprint_is_computed)
 		if (!j->footprint_is_computed)

+ 3 - 3
src/core/sched_policy.c

@@ -422,7 +422,7 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 		switch (starpu_node_get_kind(handle->mf_node))
 		switch (starpu_node_get_kind(handle->mf_node))
 		{
 		{
 		case STARPU_CPU_RAM:
 		case STARPU_CPU_RAM:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
 		case STARPU_CUDA_RAM:
 		case STARPU_CUDA_RAM:
 		{
 		{
@@ -443,7 +443,7 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 #endif
 #endif
 		default:
 		default:
 			fprintf(stderr, "Oops : %u\n", handle->mf_node);
 			fprintf(stderr, "Oops : %u\n", handle->mf_node);
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 		}
 		}
 		break;
 		break;
 #ifdef STARPU_USE_CUDA
 #ifdef STARPU_USE_CUDA
@@ -466,7 +466,7 @@ struct starpu_task *_starpu_create_conversion_task(starpu_data_handle_t handle,
 #endif
 #endif
 	case STARPU_SPU_LS: /* Not supported */
 	case STARPU_SPU_LS: /* Not supported */
 	default:
 	default:
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 	}
 	}
 
 
 	conversion_task->cl->modes[0] = STARPU_RW;
 	conversion_task->cl->modes[0] = STARPU_RW;

+ 3 - 3
src/core/task.c

@@ -767,7 +767,7 @@ _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
 					return 1;
 					return 1;
 				case STARPU_SPU_LS: /* Not supported */
 				case STARPU_SPU_LS: /* Not supported */
 				default:
 				default:
-					STARPU_ASSERT(0);
+					STARPU_ABORT();
 			}
 			}
 			break;
 			break;
 		case STARPU_CUDA_RAM:    /* Fall through */
 		case STARPU_CUDA_RAM:    /* Fall through */
@@ -781,12 +781,12 @@ _starpu_handle_needs_conversion_task(starpu_data_handle_t handle,
 					return 0;
 					return 0;
 				case STARPU_SPU_LS: /* Not supported */
 				case STARPU_SPU_LS: /* Not supported */
 				default:
 				default:
-					STARPU_ASSERT(0);
+					STARPU_ABORT();
 			}
 			}
 			break;
 			break;
 		case STARPU_SPU_LS:            /* Not supported */
 		case STARPU_SPU_LS:            /* Not supported */
 		default:
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	}
 	/* that instruction should never be reached */
 	/* that instruction should never be reached */
 	return -EINVAL;
 	return -EINVAL;

+ 1 - 0
src/datawizard/coherency.c

@@ -197,6 +197,7 @@ static int link_supports_direct_transfers(starpu_data_handle_t handle, unsigned
 	}
 	}
 #endif
 #endif
 
 
+	/* Note: with CUDA, performance seems a bit better when issuing the transfer from the destination (tested without GPUDirect) */
 	if (worker_supports_direct_access(src_node, dst_node))
 	if (worker_supports_direct_access(src_node, dst_node))
 	{
 	{
 		*handling_node = dst_node;
 		*handling_node = dst_node;

+ 1 - 1
src/datawizard/interfaces/data_interface.c

@@ -556,7 +556,7 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 				case STARPU_CPU_RAM:      /* Impossible ! */
 				case STARPU_CPU_RAM:      /* Impossible ! */
 				case STARPU_SPU_LS:       /* Not supported */
 				case STARPU_SPU_LS:       /* Not supported */
 				default:
 				default:
-					STARPU_ASSERT(0);
+					STARPU_ABORT();
 			}
 			}
 			buffers[0] = format_interface;
 			buffers[0] = format_interface;
 
 

+ 5 - 5
src/datawizard/interfaces/multiformat_interface.c

@@ -127,7 +127,7 @@ static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t
 			return multiformat_interface->opencl_ptr;
 			return multiformat_interface->opencl_ptr;
 #endif
 #endif
 		default:
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	}
 	return NULL;
 	return NULL;
 }
 }
@@ -448,7 +448,7 @@ static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__
 			break;
 			break;
 		}
 		}
 		default:
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -514,7 +514,7 @@ static int copy_cuda_common_async(void *src_interface, unsigned src_node __attri
 			break;
 			break;
 		}
 		}
 		default:
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -589,7 +589,7 @@ static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRI
 					     dst_interface, dst_node,
 					     dst_interface, dst_node,
 					     NULL);
 					     NULL);
 #else
 #else
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 #endif
 #endif
 	}
 	}
 }
 }
@@ -611,7 +611,7 @@ static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
 					     dst_interface, dst_node,
 					     dst_interface, dst_node,
 					     stream);
 					     stream);
 #else
 #else
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 #endif
 #endif
 	}
 	}
 }
 }

+ 22 - 31
src/debug/traces/starpu_fxt.c

@@ -16,6 +16,7 @@
 
 
 #include <starpu.h>
 #include <starpu.h>
 #include <common/config.h>
 #include <common/config.h>
+#include <common/uthash.h>
 
 
 #ifdef STARPU_USE_FXT
 #ifdef STARPU_USE_FXT
 #include "starpu_fxt.h"
 #include "starpu_fxt.h"
@@ -124,46 +125,41 @@ static float get_event_time_stamp(struct fxt_ev_64 *ev, struct starpu_fxt_option
 
 
 static int nworkers = 0;
 static int nworkers = 0;
 
 
+struct worker_entry
+{
+	UT_hash_handle hh;
+	unsigned long tid;
+	int workerid;
+} *worker_ids;
+
 static int register_worker_id(unsigned long tid)
 static int register_worker_id(unsigned long tid)
 {
 {
 	int workerid = nworkers++;
 	int workerid = nworkers++;
+	struct worker_entry *entry;
 
 
-	/* create a new key in the htable */
-	char *tidstr = malloc(16*sizeof(char));
-	sprintf(tidstr, "%lu", tid);
-
-	ENTRY item;
-		item.key = tidstr;
-		item.data = (void *)(uintptr_t)workerid;
-
-	ENTRY *res;
-	res = hsearch(item, FIND);
+	HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry);
 
 
 	/* only register a thread once */
 	/* only register a thread once */
-	STARPU_ASSERT(res == NULL);
+	STARPU_ASSERT(entry == NULL);
+
+	entry = malloc(sizeof(*entry));
+	entry->tid = tid;
+	entry->workerid = workerid;
 
 
-	res = hsearch(item, ENTER);
-	STARPU_ASSERT(res);
+	HASH_ADD(hh, worker_ids, tid, sizeof(tid), entry);
 
 
 	return workerid;
 	return workerid;
 }
 }
 
 
 static int find_worker_id(unsigned long tid)
 static int find_worker_id(unsigned long tid)
 {
 {
-	char tidstr[16];
-	sprintf(tidstr, "%lu", tid);
-
-	ENTRY item;
-		item.key = tidstr;
-		item.data = NULL;
-	ENTRY *res;
-	res = hsearch(item, FIND);
-	if (!res)
-		return -1;
+	struct worker_entry *entry;
 
 
-	int id = (uintptr_t)(res->data);
+	HASH_FIND(hh, worker_ids, &tid, sizeof(tid), entry);
+	if (!entry)
+		return -1;
 
 
-	return id;
+	return entry->workerid;
 }
 }
 
 
 static void update_accumulated_time(int worker, double sleep_time, double exec_time, double current_timestamp, int forceflush)
 static void update_accumulated_time(int worker, double sleep_time, double exec_time, double current_timestamp, int forceflush)
@@ -782,7 +778,7 @@ static void handle_mpi_barrier(struct fxt_ev_64 *ev, struct starpu_fxt_options *
 {
 {
 	int rank = ev->param[0];
 	int rank = ev->param[0];
 
 
-	STARPU_ASSERT(rank == options->file_rank);
+	STARPU_ASSERT(rank == options->file_rank || options->file_rank == -1);
 
 
 	/* Add an event in the trace */
 	/* Add an event in the trace */
 	if (out_paje_file)
 	if (out_paje_file)
@@ -872,9 +868,6 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 	fxt_blockev_t block;
 	fxt_blockev_t block;
 	block = fxt_blockev_enter(fut);
 	block = fxt_blockev_enter(fut);
 
 
-	/* create a htable to identify each worker(tid) */
-	hcreate(STARPU_NMAXWORKERS);
-
 	symbol_list = _starpu_symbol_name_list_new();
 	symbol_list = _starpu_symbol_name_list_new();
 	communication_list = _starpu_communication_list_new();
 	communication_list = _starpu_communication_list_new();
 
 
@@ -1081,8 +1074,6 @@ void starpu_fxt_parse_new_file(char *filename_in, struct starpu_fxt_options *opt
 		}
 		}
 	}
 	}
 
 
-	hdestroy();
-
 	/* Close the trace file */
 	/* Close the trace file */
 	if (close(fd_in))
 	if (close(fd_in))
 	{
 	{

+ 12 - 4
src/dolib.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  *
- * Copyright (C) 2010  Université de Bordeaux 1
+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
  *
  *
  * StarPU is free software; you can redistribute it and/or modify
  * StarPU is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * it under the terms of the GNU Lesser General Public License as published by
@@ -22,8 +22,10 @@
 
 
 int main(int argc, char *argv[])
 int main(int argc, char *argv[])
 {
 {
-	char *prog, *arch, *def, *name, *lib;
+	char *prog, *arch, *def, *version, *lib;
 	char s[1024];
 	char s[1024];
+	char name[16];
+	int current, age, revision;
 
 
 	if (argc != 6)
 	if (argc != 6)
 	{
 	{
@@ -34,10 +36,16 @@ int main(int argc, char *argv[])
 	prog = argv[1];
 	prog = argv[1];
 	arch = argv[2];
 	arch = argv[2];
 	def = argv[3];
 	def = argv[3];
-	name = argv[4];
+	version = argv[4];
 	lib = argv[5];
 	lib = argv[5];
 
 
-	snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
+	if (sscanf(version, "%d:%d:%d", &current, &revision, &age) != 3)
+		exit(EXIT_FAILURE);
+
+	_snprintf(name, sizeof(name), "libstarpu-%d", current - age);
+	printf("using soname %s\n", name);
+
+	_snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
 		 prog, arch, def, name, lib);
 		 prog, arch, def, name, lib);
 	if (system(s))
 	if (system(s))
 	{
 	{

+ 15 - 13
src/drivers/cuda/driver_cuda.c

@@ -156,16 +156,18 @@ static void init_context(int devid)
 	starpu_cuda_set_device(devid);
 	starpu_cuda_set_device(devid);
 
 
 #ifdef HAVE_CUDA_MEMCPY_PEER
 #ifdef HAVE_CUDA_MEMCPY_PEER
-	int nworkers = starpu_worker_get_count();
-	for (workerid = 0; workerid < nworkers; workerid++) {
-		struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
-		if (worker->arch == STARPU_CUDA_WORKER && worker->devid != devid) {
-			int can;
-			cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid);
-			if (!cures && can) {
-				cures = cudaDeviceEnablePeerAccess(worker->devid, 0);
-				if (cures)
-					_STARPU_DEBUG("GPU-Direct %d -> %d\n", worker->devid, devid);
+	if (starpu_get_env_number("STARPU_DISABLE_CUDA_GPU_GPU_DIRECT") <= 0) {
+		int nworkers = starpu_worker_get_count();
+		for (workerid = 0; workerid < nworkers; workerid++) {
+			struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);
+			if (worker->arch == STARPU_CUDA_WORKER && worker->devid != devid) {
+				int can;
+				cures = cudaDeviceCanAccessPeer(&can, devid, worker->devid);
+				if (!cures && can) {
+					cures = cudaDeviceEnablePeerAccess(worker->devid, 0);
+					if (!cures)
+						_STARPU_DEBUG("GPU-Direct %d -> %d\n", worker->devid, devid);
+				}
 			}
 			}
 		}
 		}
 	}
 	}
@@ -187,7 +189,7 @@ static void init_context(int devid)
 #ifdef HAVE_CUDA_MEMCPY_PEER
 #ifdef HAVE_CUDA_MEMCPY_PEER
 	if (props[devid].computeMode == cudaComputeModeExclusive) {
 	if (props[devid].computeMode == cudaComputeModeExclusive) {
 		fprintf(stderr, "CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n");
 		fprintf(stderr, "CUDA is in EXCLUSIVE-THREAD mode, but StarPU was built with multithread GPU control support, please either ask your administrator to use EXCLUSIVE-PROCESS mode (which should really be fine), or reconfigure with --disable-cuda-memcpy-peer but that will disable the memcpy-peer optimizations\n");
-		STARPU_ASSERT(0);
+		STARPU_ABORT();
 	}
 	}
 #endif
 #endif
 
 
@@ -535,14 +537,14 @@ void starpu_cublas_report_error(const char *func, const char *file, int line, cu
 			break;
 			break;
 	}
 	}
 	fprintf(stderr, "oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
 	fprintf(stderr, "oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ASSERT(0);
+	STARPU_ABORT();
 }
 }
 
 
 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
 void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status)
 {
 {
 	const char *errormsg = cudaGetErrorString(status);
 	const char *errormsg = cudaGetErrorString(status);
 	printf("oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
 	printf("oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
-	STARPU_ASSERT(0);
+	STARPU_ABORT();
 }
 }
 
 
 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind)
 int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_ptr, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind)

+ 3 - 1
src/drivers/opencl/driver_opencl.c

@@ -208,6 +208,8 @@ cl_int starpu_opencl_allocate_memory(cl_mem *mem, size_t size, cl_mem_flags flag
 	clFinish(queues[worker->devid]);
 	clFinish(queues[worker->devid]);
 	if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE)
 	if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE)
 		return err;
 		return err;
+	if (err == CL_OUT_OF_RESOURCES)
+		return err;
 	if (err != CL_SUCCESS)
 	if (err != CL_SUCCESS)
 		STARPU_OPENCL_REPORT_ERROR(err);
 		STARPU_OPENCL_REPORT_ERROR(err);
 
 
@@ -539,7 +541,7 @@ int _starpu_opencl_driver_run_once(struct starpu_driver *d)
 				STARPU_ABORT();
 				STARPU_ABORT();
 				return 0;
 				return 0;
 			default:
 			default:
-				STARPU_ASSERT(0);
+				STARPU_ABORT();
 		}
 		}
 	}
 	}
 
 

+ 3 - 3
tests/datawizard/interfaces/test_interfaces.c

@@ -191,8 +191,8 @@ get_field(struct data_interface_test_summary *s, int async, enum operation op)
 	case OPENCL_TO_CPU:
 	case OPENCL_TO_CPU:
 		return async?&s->opencl_to_cpu_async:&s->opencl_to_cpu;
 		return async?&s->opencl_to_cpu_async:&s->opencl_to_cpu;
 #endif /* !STARPU_USE_OPENCL */
 #endif /* !STARPU_USE_OPENCL */
-		default:
-			STARPU_ASSERT(0);
+	default:
+		STARPU_ABORT();
 	}
 	}
 	/* that instruction should never be reached */
 	/* that instruction should never be reached */
 	return NULL;
 	return NULL;
@@ -222,7 +222,7 @@ set_field(struct data_interface_test_summary *s, int async,
 			*field = TASK_SUBMISSION_FAILURE;
 			*field = TASK_SUBMISSION_FAILURE;
 			break;
 			break;
 		default:
 		default:
-			STARPU_ASSERT(0);
+			STARPU_ABORT();
 	}
 	}
 }
 }