Browse Source

optimize not-busy signalization: only take the mutex if starpu_data_unregister is indeed waiting, and the handle is not busy

Samuel Thibault 14 years ago
parent
commit
291771f0e1

+ 4 - 8
src/core/dependencies/data_concurrency.c

@@ -253,11 +253,9 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 	/* A data access has finished so we remove a reference. */
 	STARPU_ASSERT(handle->refcnt > 0);
 	handle->refcnt--;
-	PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 	STARPU_ASSERT(handle->busy_count > 0);
-	if (!--handle->busy_count)
-		PTHREAD_COND_BROADCAST(&handle->busy_cond);
-	PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+	handle->busy_count--;
+	_starpu_data_check_not_busy(handle);
 
 	/* The handle has been destroyed in between (eg. this was a temporary
 	 * handle created for a reduction.) */
@@ -343,11 +341,9 @@ void _starpu_notify_data_dependencies(starpu_data_handle handle)
 			starpu_data_requester_delete(r);
 			
 			_starpu_spin_lock(&handle->header_lock);
-			PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 			STARPU_ASSERT(handle->busy_count > 0);
-			if (!--handle->busy_count)
-				PTHREAD_COND_BROADCAST(&handle->busy_cond);
-			PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+			handle->busy_count--;
+			_starpu_data_check_not_busy(handle);
 		}
 	}
 }

+ 2 - 4
src/datawizard/coherency.c

@@ -533,11 +533,9 @@ void _starpu_release_data_on_node(starpu_data_handle handle, uint32_t default_wt
 	replicate->refcnt--;
 	STARPU_ASSERT(replicate->refcnt >= 0);
 
-	PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 	STARPU_ASSERT(handle->busy_count > 0);
-	if (!--handle->busy_count)
-		PTHREAD_COND_BROADCAST(&handle->busy_cond);
-	PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+	handle->busy_count--;
+	_starpu_data_check_not_busy(handle);
 
 	/* In case there was a temporary handle (eg. used for reduction), this
 	 * handle may have requested to be destroyed when the data is released

+ 4 - 2
src/datawizard/coherency.h

@@ -106,9 +106,11 @@ struct starpu_data_state_t {
 
 	/* Condition to make application wait for all transfers before freeing handle */
 	/* busy_count is the number of handle->refcnt, handle->per_node[*]->refcnt, and number of starpu_data_requesters */
-	/* Core code which releases busy_count has to broadcast busy_cond to
-	 * let starpu_data_unregister proceed */
+	/* Core code which releases busy_count has to call
+	 * _starpu_data_check_not_busy to let starpu_data_unregister proceed */
 	unsigned busy_count;
+	/* Is starpu_data_unregister waiting for busy_count? */
+	unsigned busy_waiting;
 	pthread_mutex_t busy_mutex;
 	pthread_cond_t busy_cond;
 

+ 4 - 9
src/datawizard/data_request.c

@@ -278,24 +278,19 @@ static void starpu_handle_data_request_completion(starpu_data_request_t r)
 	/* Remove a reference on the destination replicate  */
 	STARPU_ASSERT(dst_replicate->refcnt > 0);
 	dst_replicate->refcnt--;
+	STARPU_ASSERT(handle->busy_count > 0);
+	handle->busy_count--;
 
 	/* In case the source was "locked" by the request too */
 	if (mode & STARPU_R)
 	{
 		STARPU_ASSERT(src_replicate->refcnt > 0);
 		src_replicate->refcnt--;
-	}
-
-	PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
-	STARPU_ASSERT(handle->busy_count > 0);
-	handle->busy_count--;
-	if (mode & STARPU_R) {
 		STARPU_ASSERT(handle->busy_count > 0);
 		handle->busy_count--;
 	}
-	if (!handle->busy_count)
-		PTHREAD_COND_BROADCAST(&handle->busy_cond);
-	PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+
+	_starpu_data_check_not_busy(handle);
 
 	r->refcnt--;
 

+ 1 - 0
src/datawizard/filters.c

@@ -156,6 +156,7 @@ void starpu_data_partition(starpu_data_handle initial_handle, struct starpu_data
 		child->reduction_req_list = starpu_data_requester_list_new();
 		child->refcnt = 0;
 		child->busy_count = 0;
+		child->busy_waiting = 0;
 		PTHREAD_MUTEX_INIT(&child->busy_mutex, NULL);
 		PTHREAD_COND_INIT(&child->busy_cond, NULL);
 		child->reduction_refcnt = 0;

+ 18 - 0
src/datawizard/interfaces/data_interface.c

@@ -104,6 +104,7 @@ static void _starpu_register_new_data(starpu_data_handle handle,
 	handle->req_list = starpu_data_requester_list_new();
 	handle->refcnt = 0;
 	handle->busy_count = 0;
+	handle->busy_waiting = 0;
 	PTHREAD_MUTEX_INIT(&handle->busy_mutex, NULL);
 	PTHREAD_COND_INIT(&handle->busy_cond, NULL);
 	_starpu_spin_init(&handle->header_lock);
@@ -368,6 +369,18 @@ struct unregister_callback_arg {
 	pthread_cond_t cond;
 }; 
 
+/* Check whether we should tell starpu_data_unregister that the data handle is
+ * not busy any more.
+ * The header is supposed to be locked */
+void _starpu_data_check_not_busy(starpu_data_handle handle)
+{
+	if (!handle->busy_count && handle->busy_waiting) {
+		PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
+		PTHREAD_COND_BROADCAST(&handle->busy_cond);
+		PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+	}
+}
+
 static void _starpu_data_unregister_fetch_data_callback(void *_arg)
 {
 	int ret;
@@ -435,6 +448,11 @@ static void _starpu_data_unregister(starpu_data_handle handle, unsigned coherent
 			return;
 	}
 
+	_starpu_spin_lock(&handle->header_lock);
+	/* Tell holders of references that we're starting waiting */
+	handle->busy_waiting = 1;
+	_starpu_spin_unlock(&handle->header_lock);
+
 	/* Wait for all requests to finish (notably WT requests) */
 	PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 	while (handle->busy_count)

+ 1 - 0
src/datawizard/interfaces/data_interface.h

@@ -27,6 +27,7 @@ void _starpu_data_free_interfaces(starpu_data_handle handle)
 	STARPU_ATTRIBUTE_INTERNAL;
 
 extern void _starpu_data_interface_init(void) STARPU_ATTRIBUTE_INTERNAL;
+extern void _starpu_data_check_not_busy(starpu_data_handle handle) STARPU_ATTRIBUTE_INTERNAL;
 extern void _starpu_data_interface_shutdown(void) STARPU_ATTRIBUTE_INTERNAL;
 
 extern void _starpu_data_register_ram_pointer(starpu_data_handle handle,

+ 4 - 8
src/datawizard/memalloc.c

@@ -163,11 +163,9 @@ static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_nod
 
 			src_replicate->refcnt--;
 			dst_replicate->refcnt--;
-			PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 			STARPU_ASSERT(handle->busy_count >= 2);
-			if (!(handle->busy_count -= 2))
-				PTHREAD_COND_BROADCAST(&handle->busy_cond);
-			PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+			handle->busy_count -= 2;
+			_starpu_data_check_not_busy(handle);
 
 			break;
 		case STARPU_SHARED:
@@ -803,11 +801,9 @@ static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct star
 
 			replicate->refcnt--;
 			STARPU_ASSERT(replicate->refcnt >= 0);
-			PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 			STARPU_ASSERT(handle->busy_count > 0);
-			if (!--handle->busy_count)
-				PTHREAD_COND_BROADCAST(&handle->busy_cond);
-			PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+			handle->busy_count--;
+			_starpu_data_check_not_busy(handle);
 		}
 
 	} while((allocated_memory == -ENOMEM) && attempts++ < 2);

+ 2 - 4
src/datawizard/user_interactions.c

@@ -339,11 +339,9 @@ int _starpu_prefetch_data_on_node_with_mode(starpu_data_handle handle, unsigned
 		if (!async) {
 			replicate->refcnt--;
 			STARPU_ASSERT(replicate->refcnt >= 0);
-			PTHREAD_MUTEX_LOCK(&handle->busy_mutex);
 			STARPU_ASSERT(handle->busy_count > 0);
-			if (!--handle->busy_count)
-				PTHREAD_COND_BROADCAST(&handle->busy_cond);
-			PTHREAD_MUTEX_UNLOCK(&handle->busy_mutex);
+			handle->busy_count--;
+			_starpu_data_check_not_busy(handle);
 		}
 
 		_starpu_notify_data_dependencies(handle);