Explorar el Código

mpi/src: add explicit messages on assert failures

Nathalie Furmento hace 12 años
padre
commit
fecd363dd9

+ 32 - 32
mpi/src/starpu_mpi.c

@@ -72,7 +72,7 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_irecv_common(starpu_data_handle
 
 	_STARPU_MPI_LOG_IN();
 	struct _starpu_mpi_req *req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(req);
+	STARPU_ASSERT_MSG(req, "Invalid request");
 
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
 
@@ -115,7 +115,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	STARPU_ASSERT(req->ptr);
+	STARPU_ASSERT_MSG(req->ptr, "Pointer containing data to send is invalid");
 
 	_STARPU_MPI_DEBUG(2, "post MPI isend request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
 
@@ -124,7 +124,7 @@ static void _starpu_mpi_isend_data_func(struct _starpu_mpi_req *req)
 	TRACE_MPI_ISEND_SUBMIT_BEGIN(req->srcdst, req->mpi_tag, 0);
 
 	req->ret = MPI_Isend(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
-	STARPU_ASSERT(req->ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Isend returning %d", req->ret);
 
 	TRACE_MPI_ISEND_SUBMIT_END(req->srcdst, req->mpi_tag, 0);
 
@@ -161,7 +161,7 @@ static void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 			_STARPU_MPI_DEBUG(1, "Sending size %ld (%ld %s) with tag %d to node %d (first call to pack)\n", psize, sizeof(req->count), _starpu_mpi_datatype(MPI_BYTE), req->mpi_tag, req->srcdst);
 			req->count = psize;
 			ret = MPI_Isend(&req->count, sizeof(req->count), MPI_BYTE, req->srcdst, req->mpi_tag, req->comm, &req->size_req);
-			STARPU_ASSERT(ret == MPI_SUCCESS);
+			STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %d", ret);
 		}
 
 		// Pack the data
@@ -171,12 +171,12 @@ static void _starpu_mpi_isend_size_func(struct _starpu_mpi_req *req)
 			// We know the size now, let's send it
 			_STARPU_MPI_DEBUG(1, "Sending size %ld (%ld %s) with tag %d to node %d (second call to pack)\n", req->count, sizeof(req->count), _starpu_mpi_datatype(MPI_BYTE), req->mpi_tag, req->srcdst);
 			ret = MPI_Isend(&req->count, sizeof(req->count), MPI_BYTE, req->srcdst, req->mpi_tag, req->comm, &req->size_req);
-			STARPU_ASSERT(ret == MPI_SUCCESS);
+			STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "when sending size, MPI_Isend returning %d", ret);
 		}
 		else
 		{
 			// We check the size returned with the 2 calls to pack is the same
-			STARPU_ASSERT(req->count == psize);
+			STARPU_ASSERT_MSG(req->count == psize, "Calls to pack_data returned different sizes %ld != %ld", req->count, psize);
 		}
 
 		// We can send the data now
@@ -194,12 +194,12 @@ static struct _starpu_mpi_req *_starpu_mpi_isend_common(starpu_data_handle_t dat
 int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int dest, int mpi_tag, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
-	STARPU_ASSERT(public_req);
+	STARPU_ASSERT_MSG(public_req, "starpu_mpi_isend needs a valid starpu_mpi_req");
 
 	struct _starpu_mpi_req *req;
 	req = _starpu_mpi_isend_common(data_handle, dest, mpi_tag, comm, 0, NULL, NULL);
 
-	STARPU_ASSERT(req);
+	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_isend_common");
 	*public_req = req;
 
 	_STARPU_MPI_LOG_OUT();
@@ -241,14 +241,14 @@ static void _starpu_mpi_irecv_data_func(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
 
-	STARPU_ASSERT(req->ptr);
+	STARPU_ASSERT_MSG(req->ptr, "Invalid pointer to receive data");
 
 	_STARPU_MPI_DEBUG(2, "post MPI irecv request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n", req, _starpu_mpi_request_type(req->request_type), req->mpi_tag, req->srcdst, req->data_handle, req->ptr, _starpu_mpi_datatype(req->datatype), (int)req->count, req->user_datatype);
 
 	TRACE_MPI_IRECV_SUBMIT_BEGIN(req->srcdst, req->mpi_tag);
 
 	req->ret = MPI_Irecv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &req->request);
-	STARPU_ASSERT(req->ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_IRecv returning %d", req->ret);
 
 	TRACE_MPI_IRECV_SUBMIT_END(req->srcdst, req->mpi_tag);
 
@@ -275,7 +275,7 @@ static void _starpu_mpi_irecv_size_callback(void *arg)
 
 	starpu_data_unregister(callback->handle);
 	callback->req->ptr = malloc(callback->req->count);
-	STARPU_ASSERT_MSG(callback->req->ptr, "cannot allocate message of size %ld\n", callback->req->count);
+	STARPU_ASSERT_MSG(callback->req->ptr, "cannot allocate message of size %ld", callback->req->count);
 	_starpu_mpi_irecv_data_func(callback->req);
 	free(callback);
 }
@@ -310,12 +310,12 @@ static struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t dat
 int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *public_req, int source, int mpi_tag, MPI_Comm comm)
 {
 	_STARPU_MPI_LOG_IN();
-	STARPU_ASSERT(public_req);
+	STARPU_ASSERT_MSG(public_req, "starpu_mpi_irecv needs a valid starpu_mpi_req");
 
 	struct _starpu_mpi_req *req;
 	req = _starpu_mpi_irecv_common(data_handle, source, mpi_tag, comm, 0, NULL, NULL);
 
-	STARPU_ASSERT(req);
+	STARPU_ASSERT_MSG(req, "Invalid return for _starpu_mpi_irecv_common");
 	*public_req = req;
 
 	_STARPU_MPI_LOG_OUT();
@@ -390,7 +390,7 @@ static void _starpu_mpi_wait_func(struct _starpu_mpi_req *waiting_req)
 	TRACE_MPI_UWAIT_BEGIN(req->srcdst, req->mpi_tag);
 
 	req->ret = MPI_Wait(&req->request, waiting_req->status);
-	STARPU_ASSERT(req->ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Wait returning %d", req->ret);
 
 	TRACE_MPI_UWAIT_END(req->srcdst, req->mpi_tag);
 
@@ -403,7 +403,7 @@ int starpu_mpi_wait(starpu_mpi_req *public_req, MPI_Status *status)
 	_STARPU_MPI_LOG_IN();
 	int ret;
 	struct _starpu_mpi_req *waiting_req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(waiting_req);
+	STARPU_ASSERT_MSG(waiting_req, "Allocation failed");
 	struct _starpu_mpi_req *req = *public_req;
 
 	_STARPU_MPI_INC_POSTED_REQUESTS(1);
@@ -460,7 +460,7 @@ static void _starpu_mpi_test_func(struct _starpu_mpi_req *testing_req)
 	TRACE_MPI_UTESTING_BEGIN(req->srcdst, req->mpi_tag);
 
 	req->ret = MPI_Test(&req->request, testing_req->flag, testing_req->status);
-	STARPU_ASSERT(req->ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Test returning %d", req->ret);
 
 	TRACE_MPI_UTESTING_END(req->srcdst, req->mpi_tag);
 
@@ -482,11 +482,11 @@ int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 	_STARPU_MPI_LOG_IN();
 	int ret = 0;
 
-	STARPU_ASSERT(public_req);
+	STARPU_ASSERT_MSG(public_req, "starpu_mpi_test needs a valid starpu_mpi_req");
 
 	struct _starpu_mpi_req *req = *public_req;
 
-	STARPU_ASSERT(!req->detached);
+	STARPU_ASSERT_MSG(!req->detached, "MPI_Test cannot be called on a detached request");
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&req->req_mutex);
 	unsigned submitted = req->submitted;
@@ -495,7 +495,7 @@ int starpu_mpi_test(starpu_mpi_req *public_req, int *flag, MPI_Status *status)
 	if (submitted)
 	{
 		struct _starpu_mpi_req *testing_req = calloc(1, sizeof(struct _starpu_mpi_req));
-		STARPU_ASSERT(testing_req);
+		STARPU_ASSERT_MSG(testing_req, "allocation failed");
 		//		memset(testing_req, 0, sizeof(struct _starpu_mpi_req));
 
 		/* Initialize the request structure */
@@ -550,7 +550,7 @@ static void _starpu_mpi_barrier_func(struct _starpu_mpi_req *barrier_req)
 	_STARPU_MPI_LOG_IN();
 
 	barrier_req->ret = MPI_Barrier(barrier_req->comm);
-	STARPU_ASSERT(barrier_req->ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(barrier_req->ret == MPI_SUCCESS, "MPI_Barrier returning %d", barrier_req->ret);
 
 	_starpu_mpi_handle_request_termination(barrier_req);
 	_STARPU_MPI_LOG_OUT();
@@ -561,7 +561,7 @@ int starpu_mpi_barrier(MPI_Comm comm)
 	_STARPU_MPI_LOG_IN();
 	int ret;
 	struct _starpu_mpi_req *barrier_req = calloc(1, sizeof(struct _starpu_mpi_req));
-	STARPU_ASSERT(barrier_req);
+	STARPU_ASSERT_MSG(barrier_req, "allocation failed");
 
 	/* First wait for *both* all tasks and MPI requests to finish, in case
 	 * some tasks generate MPI requests, MPI requests generate tasks, etc.
@@ -648,7 +648,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 		MPI_Status status;
 		memset(&status, 0, sizeof(MPI_Status));
 		req->ret = MPI_Recv(req->ptr, req->count, req->datatype, req->srcdst, req->mpi_tag, req->comm, &status);
-		STARPU_ASSERT(req->ret == MPI_SUCCESS);
+		STARPU_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Recv returning %d", req->ret);
 	}
 
 	if (req->request_type == RECV_REQ || req->request_type == SEND_REQ || req->request_type == PROBE_REQ)
@@ -661,8 +661,8 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 				MPI_Status status;
 				int flag;
 				ret = MPI_Test(&req->size_req, &flag, &status);
-				STARPU_ASSERT(ret == MPI_SUCCESS);
-				STARPU_ASSERT(flag);
+				STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Test returning %d", ret);
+				STARPU_ASSERT_MSG(flag, "MPI_Test returning flag %d", flag);
 			}
 			if (req->request_type == RECV_REQ)
 				// req->ptr is freed by starpu_handle_unpack_data
@@ -751,7 +751,7 @@ static void _starpu_mpi_test_detached_requests(void)
 			req->ret = MPI_Test(&req->request, &flag, &status);
 		}
 
-		STARPU_ASSERT(req->ret == MPI_SUCCESS);
+		STARPU_ASSERT_MSG(req->ret == MPI_SUCCESS, "MPI_Iprobe or MPI_Test returning %d", req->ret);
 
 		if (flag)
 		{
@@ -811,7 +811,7 @@ static void _starpu_mpi_handle_detached_request(struct _starpu_mpi_req *req)
 static void _starpu_mpi_handle_new_request(struct _starpu_mpi_req *req)
 {
 	_STARPU_MPI_LOG_IN();
-	STARPU_ASSERT(req);
+	STARPU_ASSERT_MSG(req, "Invalid request");
 
 	/* submit the request to MPI */
 	_STARPU_MPI_DEBUG(2, "Handling new request %p type %s tag %d src %d data %p ptr %p datatype '%s' count %d user_datatype %d \n",
@@ -933,9 +933,9 @@ static void *_starpu_mpi_progress_thread_func(void *arg)
 		}
 	}
 
-	STARPU_ASSERT(_starpu_mpi_req_list_empty(detached_requests));
-	STARPU_ASSERT(_starpu_mpi_req_list_empty(new_requests));
-	STARPU_ASSERT(posted_requests == 0);
+	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(detached_requests), "List of detached requests not empty");
+	STARPU_ASSERT_MSG(_starpu_mpi_req_list_empty(new_requests), "List of new requests not empty");
+	STARPU_ASSERT_MSG(posted_requests == 0, "Number of posted request is not zero");
 
 	if (argc_argv->initialize_mpi)
 	{
@@ -970,7 +970,7 @@ static void _starpu_mpi_add_sync_point_in_fxt(void)
 	MPI_Comm_size(MPI_COMM_WORLD, &worldsize);
 
 	ret = MPI_Barrier(MPI_COMM_WORLD);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Barrier returning %d", ret);
 
 	/* We generate a "unique" key so that we can make sure that different
 	 * FxT traces come from the same MPI run. */
@@ -985,7 +985,7 @@ static void _starpu_mpi_add_sync_point_in_fxt(void)
 	}
 
 	ret = MPI_Bcast(&random_number, 1, MPI_INT, 0, MPI_COMM_WORLD);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Bcast returning %d", ret);
 
 	TRACE_MPI_BARRIER(rank, worldsize, random_number);
 
@@ -1020,7 +1020,7 @@ int _starpu_mpi_initialize(int *argc, char ***argv, int initialize_mpi)
 
 #ifdef STARPU_MPI_ACTIVITY
 	hookid = starpu_progression_hook_register(progression_hook_func, NULL);
-	STARPU_ASSERT(hookid >= 0);
+	STARPU_ASSERT_MSG(hookid >= 0, "starpu_progression_hook_register failed");
 #endif /* STARPU_MPI_ACTIVITY */
 
 	_starpu_mpi_add_sync_point_in_fxt();

+ 4 - 4
mpi/src/starpu_mpi_collective.c

@@ -65,7 +65,7 @@ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, i
 			{
 				int owner = starpu_data_get_rank(data_handles[x]);
 				int mpi_tag = starpu_data_get_tag(data_handles[x]);
-				STARPU_ASSERT(mpi_tag >= 0);
+				STARPU_ASSERT_MSG(mpi_tag >= 0, "Invalid tag for data handle");
 				if ((rank == root) && (owner != root))
 				{
 					callback_arg->count ++;
@@ -84,7 +84,7 @@ int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, i
 		{
 			int owner = starpu_data_get_rank(data_handles[x]);
 			int mpi_tag = starpu_data_get_tag(data_handles[x]);
-			STARPU_ASSERT(mpi_tag >= 0);
+			STARPU_ASSERT_MSG(mpi_tag >= 0, "Invalid tag for data handle");
 			if ((rank == root) && (owner != root))
 			{
 				//fprintf(stderr, "[%d] Sending data[%d] to %d\n", rank, x, owner);
@@ -127,7 +127,7 @@ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, in
 			{
 				int owner = starpu_data_get_rank(data_handles[x]);
 				int mpi_tag = starpu_data_get_tag(data_handles[x]);
-				STARPU_ASSERT(mpi_tag >= 0);
+				STARPU_ASSERT_MSG(mpi_tag >= 0, "Invalid tag for data handle");
 				if ((rank == root) && (owner != root))
 				{
 					callback_arg->count ++;
@@ -146,7 +146,7 @@ int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, in
 		{
 			int owner = starpu_data_get_rank(data_handles[x]);
 			int mpi_tag = starpu_data_get_tag(data_handles[x]);
-			STARPU_ASSERT(mpi_tag >= 0);
+			STARPU_ASSERT_MSG(mpi_tag >= 0, "Invalid tag for data handle");
 			if ((rank == root) && (owner != root))
 			{
 				//fprintf(stderr, "[%d] Receiving data[%d] from %d\n", rank, x, owner);

+ 12 - 12
mpi/src/starpu_mpi_datatype.c

@@ -34,10 +34,10 @@ static void handle_to_datatype_matrix(starpu_data_handle_t data_handle, MPI_Data
 	size_t elemsize = starpu_matrix_get_elemsize(data_handle);
 
 	ret = MPI_Type_vector(ny, nx*elemsize, ld*elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed");
 
 	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
 }
 
 /*
@@ -57,16 +57,16 @@ static void handle_to_datatype_block(starpu_data_handle_t data_handle, MPI_Datat
 
 	MPI_Datatype datatype_2dlayer;
 	ret = MPI_Type_vector(ny, nx*elemsize, ldy*elemsize, MPI_BYTE, &datatype_2dlayer);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed");
 
 	ret = MPI_Type_commit(&datatype_2dlayer);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
 
 	ret = MPI_Type_hvector(nz, 1, ldz*elemsize, datatype_2dlayer, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_hvector failed");
 
 	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
 }
 
 /*
@@ -81,10 +81,10 @@ static void handle_to_datatype_vector(starpu_data_handle_t data_handle, MPI_Data
 	size_t elemsize = starpu_vector_get_elemsize(data_handle);
 
 	ret = MPI_Type_contiguous(nx*elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed");
 
 	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
 }
 
 /*
@@ -98,10 +98,10 @@ static void handle_to_datatype_variable(starpu_data_handle_t data_handle, MPI_Da
 	size_t elemsize = starpu_variable_get_elemsize(data_handle);
 
 	ret = MPI_Type_contiguous(elemsize, MPI_BYTE, datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_contiguous failed");
 
 	ret = MPI_Type_commit(datatype);
-	STARPU_ASSERT(ret == MPI_SUCCESS);
+	STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
 }
 
 /*
@@ -127,7 +127,7 @@ void _starpu_mpi_handle_allocate_datatype(starpu_data_handle_t data_handle, MPI_
 	if (id < STARPU_MAX_INTERFACE_ID)
 	{
 		handle_to_datatype_func func = handle_to_datatype_funcs[id];
-		STARPU_ASSERT(func);
+		STARPU_ASSERT_MSG(func, "Handle To Datatype Function not defined for StarPU data interface %d", id);
 		func(data_handle, datatype);
 		*user_datatype = 0;
 	}
@@ -188,7 +188,7 @@ void _starpu_mpi_handle_free_datatype(starpu_data_handle_t data_handle, MPI_Data
 	if (id < STARPU_MAX_INTERFACE_ID)
 	{
 		handle_free_datatype_func func = handle_free_datatype_funcs[id];
-		STARPU_ASSERT(func);
+		STARPU_ASSERT_MSG(func, "Handle free datatype function not defined for StarPU data interface %d", id);
 		func(datatype);
 	}
 	/* else the datatype is not predefined by StarPU */

+ 2 - 3
mpi/src/starpu_mpi_insert_task.c

@@ -398,7 +398,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 			starpu_data_handle_t data = va_arg(varg_list, starpu_data_handle_t);
 			xrank = starpu_data_get_rank(data);
 			_STARPU_MPI_DEBUG(1, "Executing on data node %d\n", xrank);
-			STARPU_ASSERT(xrank <= nb_nodes);
+			STARPU_ASSERT_MSG(xrank <= nb_nodes, "Node %d to execute codelet is not a valid node (%d)", xrank, nb_nodes);
 			do_execute = 1;
 		}
 		else if (arg_type==STARPU_R || arg_type==STARPU_W || arg_type==STARPU_RW || arg_type==STARPU_SCRATCH || arg_type==STARPU_REDUX)
@@ -597,8 +597,7 @@ int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)
 		va_start(varg_list, codelet);
 		struct starpu_task *task = starpu_task_create();
 		int ret = _starpu_insert_task_create_and_submit(arg_buffer, arg_buffer_size, codelet, &task, varg_list);
-		_STARPU_MPI_DEBUG(1, "ret: %d\n", ret);
-		STARPU_ASSERT(ret==0);
+		STARPU_ASSERT_MSG(ret==0, "_starpu_insert_task_create_and_submit failure %d", ret);
 	}
 
 	if (inconsistent_execute)