Prechádzať zdrojové kódy

encompass a larger scope for transient pop sched op protection

Olivier Aumage 8 rokov pred
rodič
commit
e47d845df0

+ 2 - 2
src/core/sched_ctx.c

@@ -1160,14 +1160,14 @@ void starpu_sched_ctx_delete(unsigned sched_ctx_id)
 		{
 			struct _starpu_worker *worker = _starpu_get_worker_struct(backup_workerids[i]);
 			STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
-			if (worker->state_pop_pending)
+			if (worker->state_sched_op_pending)
 			{
 				worker->state_changing_ctx_waiting = 1;
 				do
 				{
 					STARPU_PTHREAD_COND_WAIT(&worker->sched_cond, &worker->sched_mutex);
 				}
-				while (worker->state_pop_pending);
+				while (worker->state_sched_op_pending);
 				worker->state_changing_ctx_waiting = 0;
 			}
 		}

+ 0 - 6
src/core/sched_policy.c

@@ -850,7 +850,6 @@ pick:
 			{
 				if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task)
 				{
-					worker->state_pop_pending = 1;
 					/* Note: we do not push the scheduling state here, because
 					 * otherwise when a worker is idle, we'd keep
 					 * pushing/popping a scheduling state here, while what we
@@ -859,11 +858,6 @@ pick:
 					if (task)
 						_STARPU_TASK_BREAK_ON(task, pop);
 					_starpu_pop_task_end(task);
-					worker->state_pop_pending = 0;
-					if (worker->state_changing_ctx_waiting)
-						/* cond_broadcast is required over cond_signal since
-						 * the condition is share for multiple purpose */
-						STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond);
 				}
 			}
 

+ 23 - 1
src/core/workers.h

@@ -82,7 +82,7 @@ LIST_TYPE(_starpu_worker,
 	unsigned memory_node; /* which memory node is the worker associated with ? */
 	starpu_pthread_cond_t sched_cond; /* condition variable used when the worker waits for tasks. */
         starpu_pthread_mutex_t sched_mutex; /* mutex protecting sched_cond */
-	int state_pop_pending:1; /* a task pop is ongoing even though sched_mutex may temporarily be unlocked */
+	int state_sched_op_pending:1; /* a task pop is ongoing even though sched_mutex may temporarily be unlocked */
 	int state_changing_ctx_waiting:1; /* a thread is waiting for transient operations such as pop to complete before acquiring sched_mutex and modifying the worker ctx*/
 	struct starpu_task_list local_tasks; /* this queue contains tasks that have been explicitely submitted to that queue */
 	struct starpu_task **local_ordered_tasks; /* this queue contains tasks that have been explicitely submitted to that queue with an explicit order */
@@ -622,4 +622,26 @@ void _starpu_worker_set_stream_ctx(unsigned workerid, struct _starpu_sched_ctx *
 
 struct _starpu_sched_ctx* _starpu_worker_get_ctx_stream(unsigned stream_workerid);
 
+/* Must be called with worker's sched_mutex held.
+ * Mark the beginning of a scheduling operation during which the sched_mutex
+ * lock may be temporarily released, but the scheduling context of the worker
+ * should not be modified */
+static inline void _starpu_worker_enter_transient_sched_op(struct _starpu_worker * const worker)
+{
+	worker->state_sched_op_pending = 1;
+}
+
+/* Must be called with worker's sched_mutex held.
+ * Mark the end of a scheduling operation, and notify potential waiters that
+ * scheduling context changes can safely be performed again.
+ */
+static inline void  _starpu_worker_leave_transient_sched_op(struct _starpu_worker * const worker)
+{
+	worker->state_sched_op_pending = 0;
+	if (worker->state_changing_ctx_waiting)
+		/* cond_broadcast is required over cond_signal since
+		 * the condition is share for multiple purpose */
+		STARPU_PTHREAD_COND_BROADCAST(&worker->sched_cond);
+}
+
 #endif // __WORKERS_H__

+ 6 - 0
src/drivers/driver_common/driver_common.c

@@ -420,7 +420,11 @@ struct starpu_task *_starpu_get_worker_task(struct _starpu_worker *worker, int w
 		task = NULL;
 	/*else try to pop a task*/
 	else
+	{
+		_starpu_worker_enter_transient_sched_op(worker);
 		task = _starpu_pop_task(worker);
+		_starpu_worker_leave_transient_sched_op(worker);
+	}
 
 #if !defined(STARPU_SIMGRID)
 	if (task == NULL && !executing)
@@ -515,7 +519,9 @@ int _starpu_get_multi_worker_task(struct _starpu_worker *workers, struct starpu_
 #endif
 			_starpu_worker_set_status_scheduling(workers[i].workerid);
 			_starpu_set_local_worker_key(&workers[i]);
+			_starpu_worker_enter_transient_sched_op(&workers[i]);
 			tasks[i] = _starpu_pop_task(&workers[i]);
+			_starpu_worker_leave_transient_sched_op(&workers[i]);
 			if(tasks[i] != NULL)
 			{
 				_starpu_worker_set_status_scheduling_done(workers[i].workerid);