před 16 roky · af4aae92b2
--- a/src/core/jobs.c
+++ b/src/core/jobs.c
@@ -51,7 +51,15 @@ job_t __attribute__((malloc)) job_create(struct starpu_task *task)
 
				 	job->terminated = 0;
			
 
				 
			
 
				 	if (task->synchronous)
			
 
				-		sem_init(&job->sync_sem, 0, 0);
			
 
				+	{
			
 
				+#ifdef __APPLE__ && __MACH__
			
 
				+		pthread_mutex_init(&job->sync_mutex, NULL);
			
 
				+		pthread_cond_init(&job->sync_cond, NULL);
			
 
				+#else
			
 
				+		if (sem_init(&job->sync_sem, 0, 0))
			
 
				+			perror("sem_init");
			
 
				+#endif
			
 
				+	}
			
 
				 
			
 
				 	if (task->use_tag)
			
 
				 		tag_declare(task->tag_id, job);
			
@@ -94,8 +102,14 @@ void handle_job_termination(job_t j)
 
				 
			
 
				 	if (task->synchronous)
			
 
				 	{
			
 
				+#ifdef __APPLE__ && __MACH__
			
 
				+		pthread_mutex_lock(&j->sync_mutex);
			
 
				+		pthread_cond_signal(&j->sync_cond);
			
 
				+		pthread_mutex_unlock(&j->sync_mutex);
			
 
				+#else
			
 
				 		if (sem_post(&j->sync_sem))
			
 
				 			perror("sem_post");
			
 
				+#endif
			
 
				 
			
 
				 		/* as this is a synchronous task, we do not delete the job 
			
 
				 		   structure which contains the j->sync_sem: we only liberate
			
@@ -111,8 +125,14 @@ void handle_job_termination(job_t j)
 
				 static void block_sync_task(job_t j)
			
 
				 {
			
 
				 	{
			
 
				+#ifdef __APPLE__ && __MACH__
			
 
				+		pthread_mutex_lock(&j->sync_mutex);
			
 
				+		pthread_cond_wait(&j->sync_cond, &j->sync_mutex);
			
 
				+		pthread_mutex_unlock(&j->sync_mutex);
			
 
				+#else
			
 
				 		sem_wait(&j->sync_sem);
			
 
				 		sem_destroy(&j->sync_sem);
			
 
				+#endif
			
 
				 
			
 
				 		/* as this is a synchronous task, the liberation of the job
			
 
				 		   structure was deferred */
			
@@ -164,13 +184,6 @@ int starpu_submit_task(struct starpu_task *task)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-//int submit_prio_job(job_t j)
			
 
				-//{
			
 
				-//	j->priority = MAX_PRIO;
			
 
				-//	
			
 
				-//	return submit_job(j);
			
 
				-//}
			
 
				-
			
 
				 /* This function is supplied for convenience only, it is equivalent to setting
			
 
				  * the proper flag and submitting the task with submit_task.
			
 
				  * Note that this call is blocking, and will not make StarPU progress,
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -55,7 +55,15 @@ typedef void (*callback)(void *);
 
				 LIST_TYPE(job,
			
 
				 	struct starpu_task *task;
			
 
				 
			
 
				+/* Mac OS X does not provide anonymous semaphores,
			
 
				+   so we use condition variable instead */
			
 
				+#ifdef __APPLE__ && __MACH__
			
 
				+	pthread_mutex_t sync_mutex;
			
 
				+	pthread_cond_t sync_cond;
			
 
				+#else
			
 
				 	sem_t sync_sem;
			
 
				+#endif
			
 
				+
			
 
				 
			
 
				 	struct tag_s *tag;
			
 
				 
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -229,7 +229,8 @@ static void init_workers(struct machine_config_s *config)
 
				 	{
			
 
				 		struct worker_s *workerarg = &config->workers[worker];
			
 
				 
			
 
				-		sem_init(&workerarg->ready_sem, 0, 0);
			
 
				+		pthread_mutex_init(&workerarg->mutex, NULL);
			
 
				+		pthread_cond_init(&workerarg->ready_cond, NULL);
			
 
				 
			
 
				 		/* if some codelet's termination cannot be handled directly :
			
 
				 		 * for instance in the Gordon driver, Gordon tasks' callbacks
			
@@ -244,7 +245,11 @@ static void init_workers(struct machine_config_s *config)
 
				 				workerarg->set = NULL;
			
 
				 				pthread_create(&workerarg->worker_thread, 
			
 
				 						NULL, core_worker, workerarg);
			
 
				-				sem_wait(&workerarg->ready_sem);
			
 
				+
			
 
				+				pthread_mutex_lock(&workerarg->mutex);
			
 
				+				pthread_cond_wait(&workerarg->ready_cond, &workerarg->mutex);
			
 
				+				pthread_mutex_unlock(&workerarg->mutex);
			
 
				+
			
 
				 				break;
			
 
				 #endif
			
 
				 #ifdef USE_CUDA
			
@@ -252,7 +257,11 @@ static void init_workers(struct machine_config_s *config)
 
				 				workerarg->set = NULL;
			
 
				 				pthread_create(&workerarg->worker_thread, 
			
 
				 						NULL, cuda_worker, workerarg);
			
 
				-				sem_wait(&workerarg->ready_sem);
			
 
				+
			
 
				+				pthread_mutex_lock(&workerarg->mutex);
			
 
				+				pthread_cond_wait(&workerarg->ready_cond, &workerarg->mutex);
			
 
				+				pthread_mutex_unlock(&workerarg->mutex);
			
 
				+
			
 
				 				break;
			
 
				 #endif
			
 
				 #ifdef USE_GORDON
			
@@ -266,7 +275,10 @@ static void init_workers(struct machine_config_s *config)
 
				 
			
 
				 					pthread_create(&gordon_worker_set.worker_thread, NULL, 
			
 
				 							gordon_worker, &gordon_worker_set);
			
 
				-					sem_wait(&gordon_worker_set.ready_sem);
			
 
				+
			
 
				+					pthread_mutex_lock(&gordon_worker_set.mutex);
			
 
				+					pthread_cond_wait(&gordon_worker_set.ready_cond, &gordon_worker_set.mutex);
			
 
				+					pthread_mutex_unlock(&gordon_worker_set.mutex);
			
 
				 
			
 
				 					gordon_inited = 1;
			
 
				 				}
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -63,12 +63,13 @@ enum archtype {
 
				 };
			
 
				 
			
 
				 struct worker_s {
			
 
				+        pthread_mutex_t mutex;
			
 
				 	enum archtype arch; /* what is the type of worker ? */
			
 
				 	enum starpu_perf_archtype perf_arch; /* in case there are different models of the same arch */
			
 
				 	pthread_t worker_thread; /* the thread which runs the worker */
			
 
				 	int id; /* which core/gpu/etc is controlled by the workker ? */
			
 
				-        sem_t ready_sem; /* indicate when the worker is ready */
			
 
				 	int bindid; /* which core is the driver bound to ? */
			
 
				+        pthread_cond_t ready_cond; /* indicate when the worker is ready */
			
 
				 	unsigned memory_node; /* which memory node is associated that worker to ? */
			
 
				 	struct jobq_s *jobq; /* in which queue will that worker get/put tasks ? */
			
 
				 	struct worker_set_s *set; /* in case this worker belongs to a set */
			
@@ -79,12 +80,13 @@ struct worker_s {
 
				 /* in case a single CPU worker may control multiple 
			
 
				  * accelerators (eg. Gordon for n SPUs) */
			
 
				 struct worker_set_s {
			
 
				+        pthread_mutex_t mutex;
			
 
				 	pthread_t worker_thread; /* the thread which runs the worker */
			
 
				 	unsigned nworkers;
			
 
				 	unsigned joined; /* only one thread may call pthread_join*/
			
 
				 	void *retval;
			
 
				 	struct worker_s *workers;
			
 
				-        sem_t ready_sem; /* indicate when the worker is ready */
			
 
				+        pthread_cond_t ready_cond; /* indicate when the set is ready */
			
 
				 };
			
 
				 
			
 
				 struct machine_config_s {
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -102,7 +102,8 @@ void handle_node_data_requests(uint32_t src_node)
 
				 		r->retval = driver_copy_data_1_to_1(r->state, r->src_node, r->dst_node, 0);
			
 
				 		
			
 
				 		/* wake the requesting worker up */
			
 
				-		sem_post(&r->sem);
			
 
				+		if (sem_post(&r->sem))
			
 
				+			perror("sem_post");
			
 
				 
			
 
				 		take_mutex(&data_requests_mutex[src_node]);
			
 
				 	}
			
--- a/src/drivers/core/driver_core.c
+++ b/src/drivers/core/driver_core.c
@@ -113,7 +113,9 @@ void *core_worker(void *arg)
 
				 	core_arg->jobq->total_communication_time = 0.0;
			
 
				 	
			
 
				         /* tell the main thread that we are ready */
			
 
				-        sem_post(&core_arg->ready_sem);
			
 
				+	pthread_mutex_lock(&core_arg->mutex);
			
 
				+	pthread_cond_signal(&core_arg->ready_cond);
			
 
				+	pthread_mutex_unlock(&core_arg->mutex);
			
 
				 
			
 
				         job_t j;
			
 
				 	int res;
			
--- a/src/drivers/cuda/driver_cuda.c
+++ b/src/drivers/cuda/driver_cuda.c
@@ -342,7 +342,9 @@ void *cuda_worker(void *arg)
 
				 	
			
 
				 
			
 
				 	/* tell the main thread that this one is ready */
			
 
				-	sem_post(&args->ready_sem);
			
 
				+	pthread_mutex_lock(&args->mutex);
			
 
				+	pthread_cond_signal(&args->ready_cond);
			
 
				+	pthread_mutex_unlock(&args->mutex);
			
 
				 
			
 
				 	struct job_s * j;
			
 
				 	int res;
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -23,7 +23,10 @@
 
				 #include <core/policies/sched_policy.h>
			
 
				 
			
 
				 pthread_t progress_thread;
			
 
				-sem_t progress_sem;
			
 
				+
			
 
				+pthread_cond_t progress_cond;
			
 
				+pthread_mutex_t progress_mutex;
			
 
				+
			
 
				 struct starpu_mutex_t terminated_list_mutexes[32]; 
			
 
				 
			
 
				 struct gordon_task_wrapper_s {
			
@@ -54,7 +57,9 @@ void *gordon_worker_progress(void *arg)
 
				 	sched_setaffinity(0, sizeof(aff_mask), &aff_mask);
			
 
				 #endif
			
 
				 
			
 
				-	sem_post(&progress_sem);
			
 
				+	pthread_mutex_lock(&progress_mutex);
			
 
				+	pthread_cond_signal(&progress_cond);
			
 
				+	pthread_mutex_unlock(&progress_mutex);
			
 
				 
			
 
				 	while (1) {
			
 
				 		/* the Gordon runtime needs to make sure that we poll it 
			
@@ -455,16 +460,22 @@ void *gordon_worker(void *arg)
 
				 	 */
			
 
				 
			
 
				 	/* launch the progression thread */
			
 
				-	sem_init(&progress_sem, 0, 0);
			
 
				+	pthread_mutex_init(&progress_mutex, NULL);
			
 
				+	pthread_cond_init(&progress_cond, NULL);
			
 
				+	
			
 
				 	pthread_create(&progress_thread, NULL, gordon_worker_progress, gordon_set_arg);
			
 
				 
			
 
				 	/* wait for the progression thread to be ready */
			
 
				-	sem_wait(&progress_sem);
			
 
				+	pthread_mutex_lock(&progress_mutex);
			
 
				+	pthread_cond_wait(&progress_cond, &progress_mutex);
			
 
				+	pthread_mutex_unlock(&progress_mutex);
			
 
				 
			
 
				 	fprintf(stderr, "progress thread is running ... \n");
			
 
				 	
			
 
				 	/* tell the core that gordon is ready */
			
 
				-	sem_post(&gordon_set_arg->ready_sem);
			
 
				+	pthread_mutex_lock(&gordon_set_arg->mutex);
			
 
				+	pthread_cond_signal(&gordon_set_arg->ready_cond);
			
 
				+	pthread_mutex_unlock(&gordon_set_arg->mutex);
			
 
				 
			
 
				 	gordon_worker_inject(gordon_set_arg);