/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2009, 2010, 2011 Université de Bordeaux 1 * Copyright (C) 2010 Mehdi Juhoor * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "cholesky.h" /* * Create the codelets */ static starpu_codelet cl11 = { .where = STARPU_CPU|STARPU_CUDA, .type = STARPU_SEQ, .cpu_func = chol_cpu_codelet_update_u11, #ifdef STARPU_USE_CUDA .cuda_func = chol_cublas_codelet_update_u11, #endif .nbuffers = 1, .model = &chol_model_11 }; static starpu_codelet cl21 = { .where = STARPU_CPU|STARPU_CUDA, .type = STARPU_SEQ, .cpu_func = chol_cpu_codelet_update_u21, #ifdef STARPU_USE_CUDA .cuda_func = chol_cublas_codelet_update_u21, #endif .nbuffers = 2, .model = &chol_model_21 }; static starpu_codelet cl22 = { .where = STARPU_CPU|STARPU_CUDA, .type = STARPU_SEQ, .max_parallelism = INT_MAX, .cpu_func = chol_cpu_codelet_update_u22, #ifdef STARPU_USE_CUDA .cuda_func = chol_cublas_codelet_update_u22, #endif .nbuffers = 3, .model = &chol_model_22 }; /* * code to bootstrap the factorization * and construct the DAG */ static void callback_turn_spmd_on(void *arg __attribute__ ((unused))) { cl22.type = STARPU_SPMD; } static void _cholesky(starpu_data_handle dataA, unsigned nblocks) { struct timeval start; struct timeval end; unsigned i,j,k; int prio_level = noprio?STARPU_DEFAULT_PRIO:STARPU_MAX_PRIO; gettimeofday(&start, NULL); /* create all the DAG nodes */ for (k = 0; k < nblocks; k++) { starpu_data_handle sdatakk = starpu_data_get_sub_data(dataA, 2, k, k); starpu_insert_task(&cl11, STARPU_PRIORITY, prio_level, STARPU_RW, sdatakk, STARPU_CALLBACK, (k == 3*nblocks/4)?callback_turn_spmd_on:NULL, 0); for (j = k+1; j j) { mat[j+i*size] = 0.0f; /* debug */ } } } float *test_mat = malloc(size*size*sizeof(float)); STARPU_ASSERT(test_mat); SSYRK("L", "N", size, size, 1.0f, mat, size, 0.0f, test_mat, size); FPRINTF(stderr, "comparing results ...\n"); #ifdef PRINT_OUTPUT for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { if (i <= j) { FPRINTF(stdout, "%2.2f\t", test_mat[j +i*size]); } else { FPRINTF(stdout, ".\t"); } } FPRINTF(stdout, "\n"); } #endif for (j = 0; j < size; j++) { for (i = 0; i < size; i++) { if (i <= j) { float orig = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f); float err = abs(test_mat[j +i*size] - orig); if (err > 0.00001) { FPRINTF(stderr, "Error[%u, %u] --> %2.2f != %2.2f (err %2.2f)\n", i, j, test_mat[j +i*size], orig, err); assert(0); } } } } } starpu_helper_cublas_shutdown(); starpu_shutdown(); return 0; }