/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009-2015 Université de Bordeaux * Copyright (C) 2010 Mehdi Juhoor * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "cholesky.h" #include "../sched_ctx_utils/sched_ctx_utils.h" #if defined(STARPU_USE_CUDA) && defined(STARPU_HAVE_MAGMA) #include "magma.h" #endif /* * code to bootstrap the factorization * and construct the DAG */ static void callback_turn_spmd_on(void *arg STARPU_ATTRIBUTE_UNUSED) { cl22.type = STARPU_SPMD; } static int _cholesky(starpu_data_handle_t dataA, unsigned nblocks) { int ret; double start; double end; unsigned i,j,k; unsigned long n = starpu_matrix_get_nx(dataA); unsigned long nn = n/nblocks; int prio_level = noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO; if (bound || bound_lp || bound_mps) starpu_bound_start(bound_deps, 0); starpu_fxt_start_profiling(); start = starpu_timing_now(); /* create all the DAG nodes */ for (k = 0; k < nblocks; k++) { starpu_data_handle_t sdatakk = starpu_data_get_sub_data(dataA, 2, k, k); ret = starpu_task_insert(&cl11, STARPU_PRIORITY, prio_level, STARPU_RW, sdatakk, STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, STARPU_FLOPS, (double) FLOPS_SPOTRF(nn), STARPU_TAG_ONLY, TAG11(k), 0); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert"); for (j = k+1; j j) { mat[j+i*size] = 0.0f; /* debug */ } } } float *test_mat = malloc(size*size*sizeof(float)); STARPU_ASSERT(test_mat); STARPU_SSYRK("L", "N", size, size, 1.0f, mat, size, 0.0f, test_mat, size); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { if (i <= j) { FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { if (i <= j) { float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f); float err = abs(test_mat[j +i*size] - orig); if (err > 0.00001) { FPRINTF(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err); assert(0); } } } } free(test_mat); } starpu_free(mat); } int main(int argc, char **argv) { /* create a simple definite positive symetric matrix example * * Hilbert matrix : h(i,j) = 1/(i+j+1) * */ parse_args(argc, argv); if(with_ctxs || with_noctxs || chole1 || chole2) parse_args_ctx(argc, argv); #ifdef STARPU_HAVE_MAGMA magma_init(); #endif int ret; ret = starpu_init(NULL); starpu_fxt_stop_profiling(); if (ret == -ENODEV) return 77; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_CUDA initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,cuda_chol_task_11_cost); initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,cuda_chol_task_21_cost); initialize_chol_model(&chol_model_22,"chol_model_22",cpu_chol_task_22_cost,cuda_chol_task_22_cost); #else initialize_chol_model(&chol_model_11,"chol_model_11",cpu_chol_task_11_cost,NULL); initialize_chol_model(&chol_model_21,"chol_model_21",cpu_chol_task_21_cost,NULL); initialize_chol_model(&chol_model_22,"chol_model_22",cpu_chol_task_22_cost,NULL); #endif starpu_cublas_init(); if(with_ctxs) { construct_contexts(execute_cholesky); start_2benchs(execute_cholesky); } else if(with_noctxs) start_2benchs(execute_cholesky); else if(chole1) start_1stbench(execute_cholesky); else if(chole2) start_2ndbench(execute_cholesky); else execute_cholesky(size, nblocks); starpu_cublas_shutdown(); starpu_shutdown(); return ret; }