| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 | /* * This file is part of the StarPU Handbook. * Copyright (C) 2009--2011  Universit@'e de Bordeaux * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015  CNRS * Copyright (C) 2011, 2012 INRIA * See the file version.doxy for copying conditions. *//*! \defgroup API_MPI_Support MPI Support@name Initialisation\ingroup API_MPI_Support\def STARPU_USE_MPI\ingroup API_MPI_SupportThis macro is defined when StarPU has been installed with MPIsupport. It should be used in your code to detect the availability ofMPI.\fn int starpu_mpi_init_comm(int *argc, char ***argv, int initialize_mpi, MPI_Comm comm)\ingroup API_MPI_SupportInitializes the starpumpi library with the given communicator.\p initialize_mpi indicates if MPI should be initialized or not by StarPU.If the value is not 0, MPI will be initialized by calling<c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED, ...)</c>.\fn int starpu_mpi_init(int *argc, char ***argv, int initialize_mpi)\ingroup API_MPI_SupportCall starpu_mpi_init_comm() with the MPI communicator MPI_COMM_WORLD.\fn int starpu_mpi_initialize(void)\deprecated\ingroup API_MPI_SupportThis function has been made deprecated. One should use instead thefunction starpu_mpi_init(). This function does not call MPI_Init(), itshould be called beforehand.\fn int starpu_mpi_initialize_extended(int *rank, int *world_size)\deprecated\ingroup API_MPI_SupportThis function has been made deprecated. One should use instead thefunction starpu_mpi_init(). MPI will be initialized by starpumpi bycalling <c>MPI_Init_Thread(argc, argv, MPI_THREAD_SERIALIZED,...)</c>.\fn int starpu_mpi_shutdown(void)\ingroup API_MPI_SupportCleans the starpumpi library. This must be called between callingstarpu_mpi functions and starpu_shutdown(). MPI_Finalize() will becalled if StarPU-MPI has been initialized by starpu_mpi_init().\fn void starpu_mpi_comm_amounts_retrieve(size_t *comm_amounts)\ingroup API_MPI_SupportRetrieve the current amount of communications from the current node inthe array \p comm_amounts which must have a size greater or equal tothe world size. Communications statistics must be enabled (see\ref STARPU_COMM_STATS).@name Communication\anchor MPIPtpCommunication\ingroup API_MPI_Support\fn int starpu_mpi_send(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm)\ingroup API_MPI_SupportPerforms a standard-mode, blocking send of \p data_handle to the node\p dest using the message tag \p mpi_tag within the communicator \pcomm.\fn int starpu_mpi_recv(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, MPI_Status *status)\ingroup API_MPI_SupportPerforms a standard-mode, blocking receive in \p data_handle from thenode \p source using the message tag \p mpi_tag within thecommunicator \p comm.\fn int starpu_mpi_isend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)\ingroup API_MPI_SupportPosts a standard-mode, non blocking send of \p data_handle to the node\p dest using the message tag \p mpi_tag within the communicator \pcomm. After the call, the pointer to the request \p req can be used totest or to wait for the completion of the communication.\fn int starpu_mpi_irecv(starpu_data_handle_t data_handle, starpu_mpi_req *req, int source, int mpi_tag, MPI_Comm comm)\ingroup API_MPI_SupportPosts a nonblocking receive in \p data_handle from the node \p sourceusing the message tag \p mpi_tag within the communicator \p comm.After the call, the pointer to the request \p req can be used to testor to wait for the completion of the communication.\fn int starpu_mpi_isend_detached(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)\ingroup API_MPI_SupportPosts a standard-mode, non blocking send of \p data_handle to the node\p dest using the message tag \p mpi_tag within the communicator \pcomm. On completion, the \p callback function is called with theargument \p arg.Similarly to the pthread detached functionality, when a detachedcommunication completes, its resources are automatically released backto the system, there is no need to test or to wait for the completionof the request.\fn int starpu_mpi_irecv_detached(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)\ingroup API_MPI_SupportPosts a nonblocking receive in \p data_handle from the node \p sourceusing the message tag \p mpi_tag within the communicator \p comm. Oncompletion, the \p callback function is called with the argument \parg.Similarly to the pthread detached functionality, when a detachedcommunication completes, its resources are automatically released backto the system, there is no need to test or to wait for the completionof the request.\fn int starpu_mpi_irecv_detached_sequential_consistency(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg, int sequential_consistency)\ingroup API_MPI_SupportPosts a nonblocking receive in \p data_handle from the node \p sourceusing the message tag \p mpi_tag within the communicator \p comm. Oncompletion, the \p callback function is called with the argument \parg.The parameter \p sequential_consistency allows to enable or disablethe sequential consistency for \p data handle (sequential consistencywill be enabled or disabled based on the value of the parameter \psequential_consistency and the value of the sequential consistencydefined for \p data_handle).Similarly to the pthread detached functionality, when a detachedcommunication completes, its resources are automatically released backto the system, there is no need to test or to wait for the completionof the request.\fn int starpu_mpi_issend(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm)\ingroup API_MPI_SupportPerforms a synchronous-mode, non-blocking send of \p data_handle to the node\p dest using the message tag \p mpi_tag within the communicator \pcomm.\fn int starpu_mpi_issend_detached(starpu_data_handle_t data_handle, starpu_mpi_req *req, int dest, int mpi_tag, MPI_Comm comm, void (*callback)(void *), void *arg)\ingroup API_MPI_SupportPerforms a synchronous-mode, non-blocking send of \p data_handle to the node\p dest using the message tag \p mpi_tag within the communicator \pcomm. On completion, the \p callback function is called with the argument \parg.Similarly to the pthread detached functionality, when a detachedcommunication completes, its resources are automatically released backto the system, there is no need to test or to wait for the completionof the request.\fn int starpu_mpi_wait(starpu_mpi_req *req, MPI_Status *status)\ingroup API_MPI_SupportReturns when the operation identified by request \p req is complete.\fn int starpu_mpi_test(starpu_mpi_req *req, int *flag, MPI_Status *status)\ingroup API_MPI_SupportIf the operation identified by \p req is complete, set \p flag to 1.The \p status object is set to contain information on the completedoperation.\fn int starpu_mpi_barrier(MPI_Comm comm)\ingroup API_MPI_SupportBlocks the caller until all group members of the communicator \p commhave called it.\fn int starpu_mpi_isend_detached_unlock_tag(starpu_data_handle_t data_handle, int dest, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)\ingroup API_MPI_SupportPosts a standard-mode, non blocking send of \p data_handle to the node\p dest using the message tag \p mpi_tag within the communicator \pcomm. On completion, \p tag is unlocked.\fn int starpu_mpi_irecv_detached_unlock_tag(starpu_data_handle_t data_handle, int source, int mpi_tag, MPI_Comm comm, starpu_tag_t tag)\ingroup API_MPI_SupportPosts a nonblocking receive in \p data_handle from the node \p sourceusing the message tag \p mpi_tag within the communicator \p comm. Oncompletion, \p tag is unlocked.\fn int starpu_mpi_isend_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *dest, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)\ingroup API_MPI_SupportPosts \p array_size standard-mode, non blocking send. Each post sendsthe n-th data of the array \p data_handle to the n-th node of thearray \p dest using the n-th message tag of the array \p mpi_tagwithin the n-th communicator of the array \p comm. On completion ofthe all the requests, \p tag is unlocked.\fn int starpu_mpi_irecv_array_detached_unlock_tag(unsigned array_size, starpu_data_handle_t *data_handle, int *source, int *mpi_tag, MPI_Comm *comm, starpu_tag_t tag)\ingroup API_MPI_SupportPosts \p array_size nonblocking receive. Each post receives in the n-thdata of the array \p data_handle from the n-th node of the array \psource using the n-th message tag of the array \p mpi_tag within then-th communicator of the array \p comm. On completion of the all therequests, \p tag is unlocked.\fn int starpu_mpi_get_communication_tag(void)\ingroup API_MPI_Supporttodo\fn void starpu_mpi_set_communication_tag(int tag)\ingroup API_MPI_Supporttodo@name Communication Cache\ingroup API_MPI_Support\fn int starpu_mpi_cache_is_enabled()\ingroup API_MPI_SupportReturn 1 if the communication cache is enabled, 0 otherwise\fn int starpu_mpi_cache_set(int enabled)\ingroup API_MPI_SupportIf \p enabled is 1, enable the communication cache. Otherwise, clean the cache if it was enabled and disable it.\fn void starpu_mpi_cache_flush(MPI_Comm comm, starpu_data_handle_t data_handle)\ingroup API_MPI_SupportClear the send and receive communication cache for the data\p data_handle and invalidate the value. The function has to be called synchronously by all theMPI nodes. The function does nothing if the cache mechanism isdisabled (see \ref STARPU_MPI_CACHE).\fn void starpu_mpi_cache_flush_all_data(MPI_Comm comm)\ingroup API_MPI_SupportClear the send and receive communication cache for all data and invalidate their values. Thefunction has to be called synchronously by all the MPI nodes. Thefunction does nothing if the cache mechanism is disabled (see\ref STARPU_MPI_CACHE).@name MPI Insert Task\anchor MPIInsertTask\ingroup API_MPI_Support\fn void starpu_mpi_data_register_comm(starpu_data_handle_t data_handle, int tag, int rank, MPI_Comm comm)\ingroup API_MPI_SupportRegister to MPI a StarPU data handle with the given tag, rank and MPI communicator.It also automatically clears the MPI communication cache when unregistering the data.\def starpu_mpi_data_register(data_handle, tag, rank)\ingroup API_MPI_SupportRegister to MPI a StarPU data handle with the given tag, rank and the MPI communicator MPI_COMM_WORLD.It also automatically clears the MPI communication cache when unregistering the data.\fn void starpu_mpi_data_set_tag(starpu_data_handle_t handle, int tag)\ingroup API_MPI_SupportRegister to MPI a StarPU data handle with the given tag. No rank will be defined.It also automatically clears the MPI communication cache when unregistering the data.\def starpu_data_set_tag\ingroup API_MPI_SupportSymbol kept for backward compatibility. Calling function starpu_mpi_data_set_tag\fn void starpu_mpi_data_set_rank_comm(starpu_data_handle_t handle, int rank, MPI_Comm comm)\ingroup API_MPI_SupportRegister to MPI a StarPU data handle with the given rank and given communicator. No tag will be defined.It also automatically clears the MPI communication cache when unregistering the data.\def starpu_mpi_data_set_rank\ingroup API_MPI_SupportRegister to MPI a StarPU data handle with the given rank and the MPI communicator MPI_COMM_WORLD. No tag will be defined.It also automatically clears the MPI communication cache when unregistering the data.Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank\def starpu_data_set_rank\ingroup API_MPI_SupportRegister to MPI a StarPU data handle with the given rank and the MPI communicator MPI_COMM_WORLD. No tag will be defined.It also automatically clears the MPI communication cache when unregistering the data.Symbol kept for backward compatibility. Calling function starpu_mpi_data_set_rank\fn int starpu_mpi_data_get_rank(starpu_data_handle_t handle)\ingroup API_MPI_SupportReturn the rank of the given data.\def starpu_data_get_rank(handle)\ingroup API_MPI_SupportReturn the rank of the given data.Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_rank\fn int starpu_mpi_data_get_tag(starpu_data_handle_t handle)\ingroup API_MPI_SupportReturn the tag of the given data.\def starpu_data_get_tag(handle)\ingroup API_MPI_SupportReturn the tag of the given data.Symbol kept for backward compatibility. Calling function starpu_mpi_data_get_tag\def STARPU_EXECUTE_ON_NODE\ingroup API_MPI_Supportthis macro is used when calling starpu_mpi_task_insert(), and must befollowed by a integer value which specified the node on which toexecute the codelet.\def STARPU_EXECUTE_ON_DATA\ingroup API_MPI_Supportthis macro is used when calling starpu_mpi_task_insert(), and must befollowed by a data handle to specify that the node owning the givendata will execute the codelet.\fn int starpu_mpi_insert_task(MPI_Comm comm, struct starpu_codelet *codelet, ...)\ingroup API_MPI_SupportThis function does the same as the function starpu_mpi_task_insert(). It has been kept to avoid breaking old codes.\fn int starpu_mpi_task_insert(MPI_Comm comm, struct starpu_codelet *codelet, ...)\ingroup API_MPI_SupportCreate and submit a task corresponding to codelet with the followingarguments. The argument list must be zero-terminated.The arguments following the codelet are the same types as for thefunction starpu_task_insert(). Access modes for data can also be setwith ::STARPU_SSEND to specify the data has to be sent using asynchronous and non-blocking mode (see starpu_mpi_issend()).The extra argument::STARPU_EXECUTE_ON_NODE followed by an integer allows to specify theMPI node to execute the codelet. It is also possible to specify thatthe node owning a specific data will execute the codelet, by using::STARPU_EXECUTE_ON_DATA followed by a data handle.The internal algorithm is as follows:<ol><li>        Find out which MPI node is going to execute the codelet.        <ul>            <li>If there is only one node owning data in ::STARPU_W mode, it will be selected;            <li>If there is several nodes owning data in ::STARPU_W node, a node will be selected according to a given node selection policy (see ::STARPU_NODE_SELECTION_POLICY or starpu_mpi_node_selection_set_current_policy())            <li>The argument ::STARPU_EXECUTE_ON_NODE followed by an integer can be used to specify the node;            <li>The argument ::STARPU_EXECUTE_ON_DATA followed by a data handle can be used to specify that the node owing the given data will execute the codelet.        </ul></li><li>        Send and receive data as requested. Nodes owning data which need to be read by the task are sending them to the MPI node which will execute it. The latter receives them.</li><li>        Execute the codelet. This is done by the MPI node selected in the 1st step of the algorithm.</li><li>        If several MPI nodes own data to be written to, send written data back to their owners.</li></ol>The algorithm also includes a communication cache mechanism thatallows not to send data twice to the same MPI node, unless the datahas been modified. The cache can be disabled (see \ref STARPU_MPI_CACHE).\fn struct starpu_task *starpu_mpi_task_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)\ingroup API_MPI_SupportCreate a task corresponding to codelet with the following arguments.The argument list must be zero-terminated. The function performs thefirst two steps of the function starpu_mpi_task_insert(). Only the MPInode selected in the first step of the algorithm will return a validtask structure which can then be submitted. The functionstarpu_mpi_task_post_build() MUST be called after the submission ofthe task, with the SAME list of arguments.\fn int starpu_mpi_task_post_build(MPI_Comm comm, struct starpu_codelet *codelet, ...)\ingroup API_MPI_SupportThis function MUST be called after a call to starpu_mpi_task_build(),with the SAME list of arguments. It performs the fourth -- last -- step of the algorithm described instarpu_mpi_task_insert().\fn void starpu_mpi_get_data_on_node(MPI_Comm comm, starpu_data_handle_t data_handle, int node)\ingroup API_MPI_SupportTransfer data \p data_handle to MPI node \p node, sending it from itsowner if needed. At least the target node and the owner have to callthe function.\fn void starpu_mpi_get_data_on_node_detached(MPI_Comm comm, starpu_data_handle_t data_handle, int node, void (*callback)(void*), void *arg)\ingroup API_MPI_SupportTransfer data \p data_handle to MPI node \p node, sending it from itsowner if needed. At least the target node and the owner have to callthe function. On reception, the \p callback function is called withthe argument \p arg.@name Node Selection Policy\anchor MPINodeSelectionPolicy\ingroup API_MPI_Support\fn int starpu_mpi_node_selection_get_current_policy()\ingroup API_MPI_SupportReturn the current policy used to select the node which will execute the codelet\fn int starpu_mpi_node_selection_set_current_policy(int policy)\ingroup API_MPI_SupportSet the current policy used to select the node which willexecute the codelet. The policy STARPU_MPI_NODE_SELECTION_MOST_R_DATA selects thenode having the most data in R mode so as to minimize the amount ofdata to be transfered.\fn int starpu_mpi_node_selection_register_policy(starpu_mpi_select_node_policy_func_t policy_func)\ingroup API_MPI_SupportRegister a new policy which can then be used when there is several nodes owning data in W mode.Here an example of function defining a node selection policy.The codelet will be executed on the node owing the first data with a size bigger than 1M, or on the node0 if no data fits the given size.\code{.c}int my_node_selection_policy(int me, int nb_nodes, struct starpu_data_descr *descr, int nb_data){	// me is the current MPI rank	// nb_nodes is the number of MPI nodes	// descr is the description of the data specified when calling starpu_mpi_task_insert	// nb_data is the number of data in descr	int i;	for(i= 0 ; i<nb_data ; i++)	{		starpu_data_handle_t data = descr[i].handle;		enum starpu_data_access_mode mode = descr[i].mode;		if (mode & STARPU_R)		{			int rank = starpu_data_get_rank(data);			size_t size = starpu_data_get_size(data);			if (size > 1024*1024) return rank;		}	}	return 0;}\endcode\fn int starpu_mpi_node_selection_unregister_policy(int policy)\ingroup API_MPI_SupportUnregister a previously registered policy.@name Collective Operations\anchor MPICollectiveOperations\ingroup API_MPI_Support\fn void starpu_mpi_redux_data(MPI_Comm comm, starpu_data_handle_t data_handle)\ingroup API_MPI_SupportPerform a reduction on the given data. All nodes send the data to itsowner node which will perform a reduction.\fn int starpu_mpi_scatter_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)\ingroup API_MPI_SupportScatter data among processes of the communicator based on theownership of the data. For each data of the array \p data_handles, theprocess \p root sends the data to the process owning this data. Processesreceiving data must have valid data handles to receive them. Oncompletion of the collective communication, the \p scallback function iscalled with the argument \p sarg on the process \p root, the \prcallback function is called with the argument \p rarg on any otherprocess.\fn int starpu_mpi_gather_detached(starpu_data_handle_t *data_handles, int count, int root, MPI_Comm comm, void (*scallback)(void *), void *sarg, void (*rcallback)(void *), void *rarg)\ingroup API_MPI_SupportGather data from the different processes of the communicator onto theprocess \p root. Each process owning data handle in the array\p data_handles will send them to the process \p root. The process \proot must have valid data handles to receive the data. On completionof the collective communication, the \p rcallback function is calledwith the argument \p rarg on the process root, the \p scallbackfunction is called with the argument \p sarg on any other process.*/
 |