/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2011 Université de Bordeaux 1 * Copyright (C) 2010 Mehdi Juhoor * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "mpi_cholesky.h" #include "mpi_cholesky_models.h" /* * Create the codelets */ static struct starpu_codelet cl11 = { .where = STARPU_CPU|STARPU_CUDA, .cpu_funcs = {chol_cpu_codelet_update_u11, NULL}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_u11, NULL}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &chol_model_11 }; static struct starpu_codelet cl21 = { .where = STARPU_CPU|STARPU_CUDA, .cpu_funcs = {chol_cpu_codelet_update_u21, NULL}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_u21, NULL}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &chol_model_21 }; static struct starpu_codelet cl22 = { .where = STARPU_CPU|STARPU_CUDA, .cpu_funcs = {chol_cpu_codelet_update_u22, NULL}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_u22, NULL}, #endif .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW}, .model = &chol_model_22 }; /* Returns the MPI node number where data indexes index is */ int my_distrib(int x, int y, int nb_nodes) { return (x+y) % nb_nodes; } /* * code to bootstrap the factorization * and construct the DAG */ static void dw_cholesky(float ***matA, unsigned size, unsigned ld, unsigned nblocks, int rank, int nodes) { struct timeval start; struct timeval end; starpu_data_handle_t **data_handles; int x, y; /* create all the DAG nodes */ unsigned i,j,k; data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *)); for(x=0 ; x