Procházet zdrojové kódy

MAC OS/X does not provide anonymous semaphores, so we need to use condition
variables instead. To ease maintainance, we avoid using semaphore when it does
not affect critical path, even on Linux.

Cédric Augonnet před 16 roky
rodič
revize
af4aae92b2

+ 21 - 8
src/core/jobs.c

@@ -51,7 +51,15 @@ job_t __attribute__((malloc)) job_create(struct starpu_task *task)
 	job->terminated = 0;
 
 	if (task->synchronous)
-		sem_init(&job->sync_sem, 0, 0);
+	{
+#ifdef __APPLE__ && __MACH__
+		pthread_mutex_init(&job->sync_mutex, NULL);
+		pthread_cond_init(&job->sync_cond, NULL);
+#else
+		if (sem_init(&job->sync_sem, 0, 0))
+			perror("sem_init");
+#endif
+	}
 
 	if (task->use_tag)
 		tag_declare(task->tag_id, job);
@@ -94,8 +102,14 @@ void handle_job_termination(job_t j)
 
 	if (task->synchronous)
 	{
+#ifdef __APPLE__ && __MACH__
+		pthread_mutex_lock(&j->sync_mutex);
+		pthread_cond_signal(&j->sync_cond);
+		pthread_mutex_unlock(&j->sync_mutex);
+#else
 		if (sem_post(&j->sync_sem))
 			perror("sem_post");
+#endif
 
 		/* as this is a synchronous task, we do not delete the job 
 		   structure which contains the j->sync_sem: we only liberate
@@ -111,8 +125,14 @@ void handle_job_termination(job_t j)
 static void block_sync_task(job_t j)
 {
 	{
+#ifdef __APPLE__ && __MACH__
+		pthread_mutex_lock(&j->sync_mutex);
+		pthread_cond_wait(&j->sync_cond, &j->sync_mutex);
+		pthread_mutex_unlock(&j->sync_mutex);
+#else
 		sem_wait(&j->sync_sem);
 		sem_destroy(&j->sync_sem);
+#endif
 
 		/* as this is a synchronous task, the liberation of the job
 		   structure was deferred */
@@ -164,13 +184,6 @@ int starpu_submit_task(struct starpu_task *task)
 	return ret;
 }
 
-//int submit_prio_job(job_t j)
-//{
-//	j->priority = MAX_PRIO;
-//	
-//	return submit_job(j);
-//}
-
 /* This function is supplied for convenience only, it is equivalent to setting
  * the proper flag and submitting the task with submit_task.
  * Note that this call is blocking, and will not make StarPU progress,

+ 8 - 0
src/core/jobs.h

@@ -55,7 +55,15 @@ typedef void (*callback)(void *);
 LIST_TYPE(job,
 	struct starpu_task *task;
 
+/* Mac OS X does not provide anonymous semaphores,
+   so we use condition variable instead */
+#ifdef __APPLE__ && __MACH__
+	pthread_mutex_t sync_mutex;
+	pthread_cond_t sync_cond;
+#else
 	sem_t sync_sem;
+#endif
+
 
 	struct tag_s *tag;
 

+ 16 - 4
src/core/workers.c

@@ -229,7 +229,8 @@ static void init_workers(struct machine_config_s *config)
 	{
 		struct worker_s *workerarg = &config->workers[worker];
 
-		sem_init(&workerarg->ready_sem, 0, 0);
+		pthread_mutex_init(&workerarg->mutex, NULL);
+		pthread_cond_init(&workerarg->ready_cond, NULL);
 
 		/* if some codelet's termination cannot be handled directly :
 		 * for instance in the Gordon driver, Gordon tasks' callbacks
@@ -244,7 +245,11 @@ static void init_workers(struct machine_config_s *config)
 				workerarg->set = NULL;
 				pthread_create(&workerarg->worker_thread, 
 						NULL, core_worker, workerarg);
-				sem_wait(&workerarg->ready_sem);
+
+				pthread_mutex_lock(&workerarg->mutex);
+				pthread_cond_wait(&workerarg->ready_cond, &workerarg->mutex);
+				pthread_mutex_unlock(&workerarg->mutex);
+
 				break;
 #endif
 #ifdef USE_CUDA
@@ -252,7 +257,11 @@ static void init_workers(struct machine_config_s *config)
 				workerarg->set = NULL;
 				pthread_create(&workerarg->worker_thread, 
 						NULL, cuda_worker, workerarg);
-				sem_wait(&workerarg->ready_sem);
+
+				pthread_mutex_lock(&workerarg->mutex);
+				pthread_cond_wait(&workerarg->ready_cond, &workerarg->mutex);
+				pthread_mutex_unlock(&workerarg->mutex);
+
 				break;
 #endif
 #ifdef USE_GORDON
@@ -266,7 +275,10 @@ static void init_workers(struct machine_config_s *config)
 
 					pthread_create(&gordon_worker_set.worker_thread, NULL, 
 							gordon_worker, &gordon_worker_set);
-					sem_wait(&gordon_worker_set.ready_sem);
+
+					pthread_mutex_lock(&gordon_worker_set.mutex);
+					pthread_cond_wait(&gordon_worker_set.ready_cond, &gordon_worker_set.mutex);
+					pthread_mutex_unlock(&gordon_worker_set.mutex);
 
 					gordon_inited = 1;
 				}

+ 4 - 2
src/core/workers.h

@@ -63,12 +63,13 @@ enum archtype {
 };
 
 struct worker_s {
+        pthread_mutex_t mutex;
 	enum archtype arch; /* what is the type of worker ? */
 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
 	pthread_t worker_thread; /* the thread which runs the worker */
 	int id; /* which core/gpu/etc is controlled by the workker ? */
-        sem_t ready_sem; /* indicate when the worker is ready */
 	int bindid; /* which core is the driver bound to ? */
+        pthread_cond_t ready_cond; /* indicate when the worker is ready */
 	unsigned memory_node; /* which memory node is associated that worker to ? */
 	struct jobq_s *jobq; /* in which queue will that worker get/put tasks ? */
 	struct worker_set_s *set; /* in case this worker belongs to a set */
@@ -79,12 +80,13 @@ struct worker_s {
 /* in case a single CPU worker may control multiple 
  * accelerators (eg. Gordon for n SPUs) */
 struct worker_set_s {
+        pthread_mutex_t mutex;
 	pthread_t worker_thread; /* the thread which runs the worker */
 	unsigned nworkers;
 	unsigned joined; /* only one thread may call pthread_join*/
 	void *retval;
 	struct worker_s *workers;
-        sem_t ready_sem; /* indicate when the worker is ready */
+        pthread_cond_t ready_cond; /* indicate when the set is ready */
 };
 
 struct machine_config_s {

+ 2 - 1
src/datawizard/data_request.c

@@ -102,7 +102,8 @@ void handle_node_data_requests(uint32_t src_node)
 		r->retval = driver_copy_data_1_to_1(r->state, r->src_node, r->dst_node, 0);
 		
 		/* wake the requesting worker up */
-		sem_post(&r->sem);
+		if (sem_post(&r->sem))
+			perror("sem_post");
 
 		take_mutex(&data_requests_mutex[src_node]);
 	}

+ 3 - 1
src/drivers/core/driver_core.c

@@ -113,7 +113,9 @@ void *core_worker(void *arg)
 	core_arg->jobq->total_communication_time = 0.0;
 	
         /* tell the main thread that we are ready */
-        sem_post(&core_arg->ready_sem);
+	pthread_mutex_lock(&core_arg->mutex);
+	pthread_cond_signal(&core_arg->ready_cond);
+	pthread_mutex_unlock(&core_arg->mutex);
 
         job_t j;
 	int res;

+ 3 - 1
src/drivers/cuda/driver_cuda.c

@@ -342,7 +342,9 @@ void *cuda_worker(void *arg)
 	
 
 	/* tell the main thread that this one is ready */
-	sem_post(&args->ready_sem);
+	pthread_mutex_lock(&args->mutex);
+	pthread_cond_signal(&args->ready_cond);
+	pthread_mutex_unlock(&args->mutex);
 
 	struct job_s * j;
 	int res;

+ 16 - 5
src/drivers/gordon/driver_gordon.c

@@ -23,7 +23,10 @@
 #include <core/policies/sched_policy.h>
 
 pthread_t progress_thread;
-sem_t progress_sem;
+
+pthread_cond_t progress_cond;
+pthread_mutex_t progress_mutex;
+
 struct starpu_mutex_t terminated_list_mutexes[32]; 
 
 struct gordon_task_wrapper_s {
@@ -54,7 +57,9 @@ void *gordon_worker_progress(void *arg)
 	sched_setaffinity(0, sizeof(aff_mask), &aff_mask);
 #endif
 
-	sem_post(&progress_sem);
+	pthread_mutex_lock(&progress_mutex);
+	pthread_cond_signal(&progress_cond);
+	pthread_mutex_unlock(&progress_mutex);
 
 	while (1) {
 		/* the Gordon runtime needs to make sure that we poll it 
@@ -455,16 +460,22 @@ void *gordon_worker(void *arg)
 	 */
 
 	/* launch the progression thread */
-	sem_init(&progress_sem, 0, 0);
+	pthread_mutex_init(&progress_mutex, NULL);
+	pthread_cond_init(&progress_cond, NULL);
+	
 	pthread_create(&progress_thread, NULL, gordon_worker_progress, gordon_set_arg);
 
 	/* wait for the progression thread to be ready */
-	sem_wait(&progress_sem);
+	pthread_mutex_lock(&progress_mutex);
+	pthread_cond_wait(&progress_cond, &progress_mutex);
+	pthread_mutex_unlock(&progress_mutex);
 
 	fprintf(stderr, "progress thread is running ... \n");
 	
 	/* tell the core that gordon is ready */
-	sem_post(&gordon_set_arg->ready_sem);
+	pthread_mutex_lock(&gordon_set_arg->mutex);
+	pthread_cond_signal(&gordon_set_arg->ready_cond);
+	pthread_mutex_unlock(&gordon_set_arg->mutex);
 
 	gordon_worker_inject(gordon_set_arg);