/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015, 2016, 2017 CNRS * Copyright (C) 2015 INRIA * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include #include "helper.h" #define NUM_EL 5 #define NUM_LOOPS 10 /* * This testcase written by J-M Couteyen allows to test that several * early requests for a given source and tag can be posted to StarPU * by the application before data arrive. * * In this test case, multiples processes (called "domains") exchanges * informations between multiple "elements" multiple times, with * different sizes (in order to catch error more easily). * The communications are independent between the elements (each one * as its proper tag), but must occur in the submitted order for an * element taken independtly. */ struct element { int tag; int foreign_domain; int array_send[100]; int array_recv[100]; starpu_data_handle_t ensure_submitted_order_send; starpu_data_handle_t ensure_submitted_order_recv; starpu_data_handle_t send; starpu_data_handle_t recv; }; /* functions/codelet to fill the bufferss*/ void fill_tmp_buffer(void *buffers[], void *cl_arg) { int *tmp = (int *) STARPU_VECTOR_GET_PTR(buffers[0]); int nx = STARPU_VECTOR_GET_NX(buffers[0]); int i; for (i=0; itag=size; el->foreign_domain=foreign_domain; int mpi_rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); starpu_vector_data_register(&el->recv, 0, (uintptr_t)el->array_recv, size, sizeof(int)); starpu_vector_data_register(&el->send, 0, (uintptr_t)el->array_send, size, sizeof(int)); starpu_void_data_register(&el->ensure_submitted_order_send); starpu_void_data_register(&el->ensure_submitted_order_recv); } void free_element(struct element *el) { starpu_data_unregister(el->recv); starpu_data_unregister(el->send); starpu_data_unregister(el->ensure_submitted_order_send); starpu_data_unregister(el->ensure_submitted_order_recv); } void insert_work_for_one_element(struct element *el) { starpu_data_handle_t tmp_recv; starpu_data_handle_t tmp_send; starpu_vector_data_register(&tmp_recv, -1, 0, el->tag, sizeof(int)); starpu_vector_data_register(&tmp_send, -1, 0, el->tag, sizeof(int)); //Emulate the work to fill the send buffer starpu_insert_task(&fill_tmp_buffer_cl, STARPU_W,tmp_send, 0); //Send operation starpu_insert_task(&submitted_order, STARPU_RW,el->ensure_submitted_order_send, STARPU_W,tmp_send, 0); starpu_mpi_isend_detached(tmp_send,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); //Recv operation for current element starpu_insert_task(&submitted_order, STARPU_RW,el->ensure_submitted_order_recv, STARPU_W,tmp_recv, 0); starpu_mpi_irecv_detached(tmp_recv,el->foreign_domain,el->tag, MPI_COMM_WORLD, NULL, NULL); //Emulate the "reading" of the recv value. starpu_insert_task(&read_ghost_value_cl, STARPU_R,tmp_recv, 0); starpu_data_unregister_submit(tmp_send); starpu_data_unregister_submit(tmp_recv); } /*main program*/ int main(int argc, char * argv[]) { /* Init */ int ret; int mpi_rank, mpi_size; int mpi_init; MPI_INIT_THREAD(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_init); ret = starpu_init(NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); ret = starpu_mpi_init(&argc, &argv, mpi_init); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &mpi_rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &mpi_size); if (starpu_cpu_worker_get_count() == 0) { if (mpi_rank == 0) FPRINTF(stderr, "We need at least 1 CPU worker.\n"); starpu_mpi_shutdown(); starpu_shutdown(); if (!mpi_init) MPI_Finalize(); return STARPU_TEST_SKIPPED; } /*element initialization : domains are connected as a ring for this test*/ int num_elements=NUM_EL; struct element * el_left=malloc(num_elements*sizeof(el_left[0])); struct element * el_right=malloc(num_elements*sizeof(el_right[0])); int i; for(i=0;i