/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2019-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examplifies how to declare a new filter function. */ #include #define NX 20 #define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { unsigned i; int factor; int rank; starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); fprintf(stderr, "computing on rank %d\n", rank); unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); starpu_codelet_unpack_args(cl_arg, &factor); for (i = 0; i < n; i++) val[i] *= factor; } struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .nbuffers = 1, .modes = {STARPU_RW}, .name = "vector_scal" }; void vector_filter(void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nchunks) { struct starpu_vector_interface *vector_father = (struct starpu_vector_interface *) father_interface; struct starpu_vector_interface *vector_child = (struct starpu_vector_interface *) child_interface; uint32_t nx = vector_father->nx; size_t elemsize = vector_father->elemsize; STARPU_ASSERT_MSG(nchunks <= nx, "%u parts for %u elements", nchunks, nx); STARPU_ASSERT(nchunks == 2); STARPU_ASSERT_MSG((nx % nchunks) == 0, "nx=%d is not a multiple of nchunks %d\n", nx, nchunks); vector_child->id = vector_father->id; vector_child->nx = nx/2; vector_child->elemsize = elemsize; vector_child->allocsize = vector_child->nx * elemsize; if (vector_father->dev_handle) { size_t offset = (id *(nx/nchunks)) * elemsize; if (vector_father->ptr) vector_child->ptr = vector_father->ptr + offset; vector_child->dev_handle = vector_father->dev_handle; vector_child->offset = vector_father->offset + offset; } } int main(int argc, char **argv) { int i, rank, nodes; int vector[NX]; int vector_check[NX]; starpu_data_handle_t vhandle; starpu_data_handle_t handles[2]; int factor[2] = {2, 3}; int ret; ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf"); starpu_mpi_comm_rank(MPI_COMM_WORLD, &rank); starpu_mpi_comm_size(MPI_COMM_WORLD, &nodes); if (nodes < 2 || (starpu_cpu_worker_get_count() == 0)) { if (rank == 0) { if (nodes < 2) fprintf(stderr, "We need at least 2 processes.\n"); else fprintf(stderr, "We need at least 1 CPU.\n"); } starpu_mpi_shutdown(); return 77; } for(i=0 ; i