瀏覽代碼

Merge remote-tracking branch 'origin/master' into ft_checkpoint

Romain LION 5 年之前
父節點
當前提交
de205a34f9

+ 7 - 1
include/starpu_data.h

@@ -161,7 +161,13 @@ void starpu_data_unregister_no_coherency(starpu_data_handle_t handle);
 
 /**
    Destroy the data \p handle once it is no longer needed by any
-   submitted task. No coherency is assumed.
+   submitted task. No coherency is provided.
+
+   This is not safe to call starpu_data_unregister_submit() on a handle that
+   comes from the registration of a non-NULL application home buffer, since the
+   moment when the unregistration will happen is unknown to the
+   application. Only calling starpu_shutdown() allows to be sure that the data
+   was really unregistered.
 */
 void starpu_data_unregister_submit(starpu_data_handle_t handle);
 

+ 0 - 1
mpi/src/mpi/starpu_mpi_early_data.c

@@ -86,7 +86,6 @@ struct _starpu_mpi_early_data_handle *_starpu_mpi_early_data_create(struct _star
 	_STARPU_MPI_CALLOC(early_data_handle, 1, sizeof(struct _starpu_mpi_early_data_handle));
 	STARPU_PTHREAD_MUTEX_INIT(&early_data_handle->req_mutex, NULL);
 	STARPU_PTHREAD_COND_INIT(&early_data_handle->req_cond, NULL);
-	early_data_handle->env = envelope;
 	early_data_handle->node_tag.node.comm = comm;
 	early_data_handle->node_tag.node.rank = source;
 	early_data_handle->node_tag.data_tag = envelope->data_tag;

+ 3 - 1
mpi/src/mpi/starpu_mpi_early_data.h

@@ -34,9 +34,9 @@ extern "C"
 
 LIST_TYPE(_starpu_mpi_early_data_handle,
 	  starpu_data_handle_t handle;
-	  struct _starpu_mpi_envelope *env;
 	  struct _starpu_mpi_req *req;
 	  void *buffer;
+	  size_t size;
 	  int req_ready;
 	  struct _starpu_mpi_node_tag node_tag;
 	  starpu_pthread_mutex_t req_mutex;
@@ -50,6 +50,8 @@ struct _starpu_mpi_early_data_handle_tag_hashlist
 	starpu_mpi_tag_t data_tag;
 };
 
+struct _starpu_mpi_envelope;
+
 void _starpu_mpi_early_data_init(void);
 void _starpu_mpi_early_data_check_termination(void);
 void _starpu_mpi_early_data_shutdown(void);

+ 11 - 3
mpi/src/mpi/starpu_mpi_mpi.c

@@ -135,6 +135,7 @@ struct _starpu_mpi_early_data_cb_args
 	starpu_data_handle_t early_handle;
 	struct _starpu_mpi_req *req;
 	void *buffer;
+	size_t size;
 };
 
 void _starpu_mpi_submit_ready_request_inc(struct _starpu_mpi_req *req)
@@ -251,6 +252,7 @@ void _starpu_mpi_submit_ready_request(void *arg)
 				cb_args->data_handle = req->data_handle;
 				cb_args->early_handle = early_data_handle->handle;
 				cb_args->buffer = early_data_handle->buffer;
+				cb_args->size = early_data_handle->size;
 				cb_args->req = req;
 
 				_STARPU_MPI_DEBUG(3, "Calling data_acquire_cb on starpu_mpi_copy_cb..\n");
@@ -925,6 +927,8 @@ static void _starpu_mpi_early_data_cb(void* arg)
 			int position=0;
 			void *ptr = starpu_data_get_local_ptr(args->data_handle);
 			MPI_Unpack(args->buffer, itf_src->get_size(args->early_handle), &position, ptr, 1, datatype, args->req->node_tag.node.comm);
+			starpu_free_on_node_flags(STARPU_MAIN_RAM, (uintptr_t) args->buffer, args->size, 0);
+			args->buffer = NULL;
 			_starpu_mpi_datatype_free(args->data_handle, &datatype);
 		}
 		else
@@ -956,6 +960,9 @@ static void _starpu_mpi_early_data_cb(void* arg)
 	starpu_data_release(args->early_handle);
 
 	_STARPU_MPI_DEBUG(3, "Done, handling unregister of early_handle..\n");
+	/* XXX: note that we have already freed the registered buffer above. In
+	 * principle that's unsafe. As of now it is fine because StarPU has no
+	 reason to access it. */
 	starpu_data_unregister_submit(args->early_handle);
 
 	_STARPU_MPI_DEBUG(3, "Done, handling request %p termination of the already received request\n",args->req);
@@ -1145,9 +1152,10 @@ static void _starpu_mpi_receive_early_data(struct _starpu_mpi_envelope *envelope
 		 * we are going to receive the data as a raw memory, and give it
 		 * to the application when it post a receive for this tag
 		 */
-		_STARPU_MPI_DEBUG(3, "Posting a receive for a data of size %d which has not yet been registered\n", (int)early_data_handle->env->size);
-		early_data_handle->buffer = (void *)starpu_malloc_on_node_flags(STARPU_MAIN_RAM, early_data_handle->env->size, 0);
-		starpu_variable_data_register(&early_data_handle->handle, STARPU_MAIN_RAM, (uintptr_t) early_data_handle->buffer, early_data_handle->env->size);
+		_STARPU_MPI_DEBUG(3, "Posting a receive for a data of size %d which has not yet been registered\n", (int)envelope->size);
+		early_data_handle->buffer = (void *)starpu_malloc_on_node_flags(STARPU_MAIN_RAM, envelope->size, 0);
+		early_data_handle->size = envelope->size;
+		starpu_variable_data_register(&early_data_handle->handle, STARPU_MAIN_RAM, (uintptr_t) early_data_handle->buffer, envelope->size);
 		//_starpu_mpi_early_data_add(early_data_handle);
 	}
 

+ 2 - 0
tests/datawizard/lazy_unregister.c

@@ -67,6 +67,8 @@ int main(void)
 	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
 
 	starpu_data_unregister_submit(handle);
+	/* Note: we have no way to know when this will happen. We have to wait
+	 * for starpu_shutdown before being able to free the registered buffer */
 
 	ret = starpu_task_submit(t1);
 	if (ret == -ENODEV)