/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009, 2010, 2014-2015, 2017 Université de Bordeaux * Copyright (C) 2010, 2011, 2012, 2013, 2014, 2015 CNRS * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "mpi_cholesky.h" #include #include #include /* * Create the codelets */ static struct starpu_codelet cl11 = { .cpu_funcs = {chol_cpu_codelet_update_u11}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_u11}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 1, .modes = {STARPU_RW}, .model = &chol_model_11 }; static struct starpu_codelet cl21 = { .cpu_funcs = {chol_cpu_codelet_update_u21}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_u21}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_RW}, .model = &chol_model_21 }; static struct starpu_codelet cl22 = { .cpu_funcs = {chol_cpu_codelet_update_u22}, #ifdef STARPU_USE_CUDA .cuda_funcs = {chol_cublas_codelet_update_u22}, #elif defined(STARPU_SIMGRID) .cuda_funcs = {(void*)1}, #endif .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_RW | STARPU_COMMUTE}, .model = &chol_model_22 }; /* * code to bootstrap the factorization * and construct the DAG */ void dw_cholesky(float ***matA, unsigned ld, int rank, int nodes, double *timing, double *flops) { double start; double end; starpu_data_handle_t **data_handles; unsigned x,y,i,j,k; unsigned unbound_prio = STARPU_MAX_PRIO == INT_MAX && STARPU_MIN_PRIO == INT_MIN; /* create all the DAG nodes */ data_handles = malloc(nblocks*sizeof(starpu_data_handle_t *)); for(x=0 ; x j) { rmat[j+i*size] = 0.0f; // debug } } } float *test_mat = malloc(size*size*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size, size, 1.0f, rmat, size, 0.0f, test_mat, size); FPRINTF(stderr, "[%d] comparing results ...\n", rank); if (display) { for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { if (i <= j) { printf("%2.2f\t", test_mat[j +i*size]); } else { printf(".\t"); } } printf("\n"); } } *correctness = 1; for(x = 0; x < nblocks ; x++) { for (y = 0; y < nblocks; y++) { int mpi_rank = my_distrib(x, y, nodes); if (mpi_rank == rank) { for (i = (size/nblocks)*x ; i < (size/nblocks)*x+(size/nblocks); i++) { for (j = (size/nblocks)*y ; j < (size/nblocks)*y+(size/nblocks); j++) { if (i <= j) { float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f); float err = abs(test_mat[j +i*size] - orig); if (err > 0.00001) { FPRINTF(stderr, "[%d] Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", rank, i, j, test_mat[j +i*size], orig, err); *correctness = 0; *flops = 0; break; } } } } } } } free(rmat); free(test_mat); }