Pārlūkot izejas kodu

merge from trunk

Olivier Aumage 8 gadi atpakaļ
vecāks
revīzija
19cb83f85b

+ 6 - 1
examples/heat/dw_sparse_cg.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010, 2011, 2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010, 2011, 2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2016  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -186,6 +186,11 @@ void init_cg(struct cg_problem *problem)
 
 	/* launch the computation now */
 	ret = starpu_task_submit(task1);
+	if (STARPU_UNLIKELY(ret == -ENODEV))
+	{
+		FPRINTF(stderr, "No worker may execute this task\n");
+		exit(0);
+	}
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 	ret = starpu_task_submit(task2);
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");

+ 4 - 0
examples/lu/xlu_implicit_pivot.c

@@ -232,6 +232,10 @@ starpu_data_handle_t get_block_with_striding(starpu_data_handle_t *dataAp,
 
 int STARPU_LU(lu_decomposition_pivot)(TYPE *matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
 {
+	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
+		/* These won't work with pivoting: we pass a pointer in cl_args */
+		return -ENODEV;
+
 	starpu_data_handle_t dataA;
 
 	/* monitor and partition the A matrix into blocks :

+ 4 - 0
examples/lu/xlu_pivot.c

@@ -413,6 +413,10 @@ starpu_data_handle_t get_block_with_no_striding(starpu_data_handle_t *dataAp, un
 
 int STARPU_LU(lu_decomposition_pivot_no_stride)(TYPE **matA, unsigned *ipiv, unsigned size, unsigned ld, unsigned nblocks)
 {
+	if (starpu_mic_worker_get_count() || starpu_scc_worker_get_count() || starpu_mpi_ms_worker_get_count())
+		/* These won't work with pivoting: we pass a pointer in cl_args */
+		return -ENODEV;
+
 	starpu_data_handle_t *dataAp = malloc(nblocks*nblocks*sizeof(starpu_data_handle_t));
 
 	/* monitor and partition the A matrix into blocks :

+ 27 - 0
include/starpu_thread.h

@@ -33,6 +33,7 @@
 #endif
 #elif !defined(_MSC_VER) || defined(BUILDING_STARPU)
 #include <pthread.h>
+#include <semaphore.h>
 #endif
 #include <stdint.h>
 
@@ -407,6 +408,32 @@ int starpu_pthread_wait_wait(starpu_pthread_wait_t *w);
 int starpu_pthread_wait_destroy(starpu_pthread_wait_t *w);
 #endif
 
+/*
+ * Encapsulation of the semaphore functions.
+ */
+
+#ifdef STARPU_SIMGRID
+
+typedef msg_sem_t starpu_sem_t;
+int starpu_sem_destroy(starpu_sem_t *);
+int starpu_sem_getvalue(starpu_sem_t *, int *);
+int starpu_sem_init(starpu_sem_t *, int, unsigned);
+int starpu_sem_post(starpu_sem_t *);
+int starpu_sem_trywait(starpu_sem_t *);
+int starpu_sem_wait(starpu_sem_t *);
+
+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
+
+typedef sem_t starpu_sem_t;
+#define starpu_sem_destroy sem_destroy
+#define starpu_sem_getvalue sem_getvalue
+#define starpu_sem_init sem_init
+#define starpu_sem_post sem_post
+int starpu_sem_trywait(starpu_sem_t *);
+int starpu_sem_wait(starpu_sem_t *);
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif

+ 71 - 1
src/common/thread.c

@@ -19,6 +19,7 @@
 #include <core/simgrid.h>
 #include <core/workers.h>
 
+#include <errno.h>
 #include <limits.h>
 
 #ifdef STARPU_SIMGRID
@@ -529,7 +530,7 @@ int starpu_pthread_queue_destroy(starpu_pthread_queue_t *q)
 #endif /* STARPU_SIMGRID */
 
 #if (defined(STARPU_SIMGRID) && !defined(STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT)) || (!defined(STARPU_SIMGRID) && !defined(STARPU_HAVE_PTHREAD_BARRIER))
-int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr, unsigned count)
+int starpu_pthread_barrier_init(starpu_pthread_barrier_t *restrict barrier, const starpu_pthread_barrierattr_t *restrict attr STARPU_ATTRIBUTE_UNUSED, unsigned count)
 {
 	int ret = starpu_pthread_mutex_init(&barrier->mutex, NULL);
 	if (!ret)
@@ -850,3 +851,72 @@ void _starpu_pthread_spin_do_unlock(starpu_pthread_spinlock_t *lock)
 #endif
 
 #endif /* defined(STARPU_SIMGRID) || (defined(STARPU_LINUX_SYS) && defined(STARPU_HAVE_XCHG)) || !defined(STARPU_HAVE_PTHREAD_SPIN_LOCK) */
+
+#ifdef STARPU_SIMGRID
+
+int starpu_sem_destroy(starpu_sem_t *sem)
+{
+	MSG_sem_destroy(*sem);
+	return 0;
+}
+
+int starpu_sem_init(starpu_sem_t *sem, int pshared, unsigned value)
+{
+	STARPU_ASSERT_MSG(pshared == 0, "pshared semaphores not supported under simgrid");
+	*sem = MSG_sem_init(value);
+	return 0;
+}
+
+int starpu_sem_post(starpu_sem_t *sem)
+{
+	MSG_sem_release(*sem);
+	return 0;
+}
+
+int starpu_sem_wait(starpu_sem_t *sem)
+{
+	MSG_sem_acquire(*sem);
+	return 0;
+}
+
+int starpu_sem_trywait(starpu_sem_t *sem)
+{
+	if (MSG_sem_would_block(*sem))
+		return EAGAIN;
+	starpu_sem_wait(sem);
+	return 0;
+}
+
+int starpu_sem_getvalue(starpu_sem_t *sem, int *sval)
+{
+#if SIMGRID_VERSION_MAJOR > 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR > 13)
+	*sval = MSG_sem_get_capacity(*sem);
+	return 0;
+#else
+	(void) sem;
+	(void) sval;
+	STARPU_ABORT_MSG("sigmrid up to 3.13 did not have working MSG_sem_get_capacity");
+#endif
+}
+
+#elif !defined(_MSC_VER) || defined(BUILDING_STARPU) /* !STARPU_SIMGRID */
+
+int starpu_sem_wait(starpu_sem_t *sem)
+{
+	int ret;
+	while((ret = sem_wait(sem)) == -1 && errno == EINTR)
+		;
+
+	return ret;
+}
+
+int starpu_sem_trywait(starpu_sem_t *sem)
+{
+	int ret;
+	while((ret = sem_trywait(sem)) == -1 && errno == EINTR)
+		;
+	
+	return ret;
+}
+
+#endif

+ 5 - 5
src/core/sched_ctx.h

@@ -241,7 +241,7 @@ static inline struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id
 static inline int _starpu_sched_ctx_check_write_locked(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	return sched_ctx->lock_write_owner == starpu_pthread_self();
+	return starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self());
 }
 #define STARPU_SCHED_CTX_CHECK_LOCK(sched_ctx_id) STARPU_ASSERT(_starpu_sched_ctx_check_write_locked((sched_ctx_id)))
 
@@ -249,7 +249,7 @@ static inline void _starpu_sched_ctx_lock_write(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner);
-	STARPU_ASSERT(sched_ctx->lock_write_owner != starpu_pthread_self());
+	STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
 	STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner);
 	STARPU_PTHREAD_RWLOCK_WRLOCK(&sched_ctx->rwlock);
 	sched_ctx->lock_write_owner = starpu_pthread_self();
@@ -258,7 +258,7 @@ static inline void _starpu_sched_ctx_lock_write(unsigned sched_ctx_id)
 static inline void _starpu_sched_ctx_unlock_write(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	STARPU_ASSERT(sched_ctx->lock_write_owner == starpu_pthread_self());
+	STARPU_ASSERT(starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
 	sched_ctx->lock_write_owner = 0;
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock);
 }
@@ -267,7 +267,7 @@ static inline void _starpu_sched_ctx_lock_read(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
 	STARPU_HG_DISABLE_CHECKING(sched_ctx->lock_write_owner);
-	STARPU_ASSERT(sched_ctx->lock_write_owner != starpu_pthread_self());
+	STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
 	STARPU_HG_ENABLE_CHECKING(sched_ctx->lock_write_owner);
 	STARPU_PTHREAD_RWLOCK_RDLOCK(&sched_ctx->rwlock);
 }
@@ -275,7 +275,7 @@ static inline void _starpu_sched_ctx_lock_read(unsigned sched_ctx_id)
 static inline void _starpu_sched_ctx_unlock_read(unsigned sched_ctx_id)
 {
 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
-	STARPU_ASSERT(sched_ctx->lock_write_owner != starpu_pthread_self());
+	STARPU_ASSERT(!starpu_pthread_equal(sched_ctx->lock_write_owner, starpu_pthread_self()));
 	STARPU_PTHREAD_RWLOCK_UNLOCK(&sched_ctx->rwlock);
 }
 

+ 6 - 7
src/profiling/bound.c

@@ -209,11 +209,11 @@ static double** initialize_arch_duration(int maxdevid, unsigned* maxncore_table)
 static void initialize_duration(struct bound_task *task)
 {
 	struct _starpu_machine_config *conf = _starpu_get_machine_config();
-	task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.ncpus); 
-	task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.ncudagpus,NULL); 
-	task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nopenclgpus,NULL); 
-	task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nmicdevices,conf->topology.nmiccores); 
-	task->duration[STARPU_SCC_WORKER] = initialize_arch_duration(conf->topology.nsccdevices,NULL); 
+	task->duration[STARPU_CPU_WORKER] = initialize_arch_duration(1,&conf->topology.nhwcpus); 
+	task->duration[STARPU_CUDA_WORKER] = initialize_arch_duration(conf->topology.nhwcudagpus,NULL); 
+	task->duration[STARPU_OPENCL_WORKER] = initialize_arch_duration(conf->topology.nhwopenclgpus,NULL); 
+	task->duration[STARPU_MIC_WORKER] = initialize_arch_duration(conf->topology.nhwmicdevices,conf->topology.nmiccores); 
+	task->duration[STARPU_SCC_WORKER] = initialize_arch_duration(conf->topology.nhwscc,NULL); 
 }
 
 static struct starpu_perfmodel_device device =
@@ -278,8 +278,7 @@ void _starpu_bound_record(struct _starpu_job *j)
 	{
 		struct bound_task_pool *tp;
 
-		/* FIXME: bogus STARPU_CPU_WORKER parameter for arch pointer */
-		_starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, STARPU_CPU_WORKER, 0, j);
+		_starpu_compute_buffers_footprint(j->task->cl?j->task->cl->model:NULL, NULL, 0, j);
 
 		if (last && last->cl == j->task->cl && last->footprint == j->footprint)
 			tp = last;

+ 1 - 1
starpu.mk

@@ -16,7 +16,7 @@
 
 if STARPU_USE_MPI_MASTER_SLAVE
 MPI_LAUNCHER 			= $(MPIEXEC)  $(MPIEXEC_ARGS) -np 4
-MPI_RUN_ARGS			= STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4
+MPI_RUN_ARGS			= STARPU_WORKERS_NOBIND=1 STARPU_NCPU=4 STARPU_NMPIMSTHREADS=4
 endif
 
 showcheck: