/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012-2015,2017 CNRS * Copyright (C) 2013 Inria * Copyright (C) 2012-2017,2019 Université de Bordeaux * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #ifdef STARPU_USE_CUDA # include #endif /* * Compare the efficiency of matrix and vector interfaces */ #ifdef STARPU_QUICK_CHECK #define LOOPS 5 #elif !defined(STARPU_LONG_CHECK) #define LOOPS 30 #else #define LOOPS 100 #endif void vector_cpu_func(void *descr[], void *cl_arg) { (void)cl_arg; STARPU_SKIP_IF_VALGRIND; float *matrix = (float *)STARPU_VECTOR_GET_PTR(descr[0]); int nx = STARPU_VECTOR_GET_NX(descr[0]); int i; float sum=0; for(i=0 ; i mean=%7f != %7f\n", nx, matrix[0], mean); ret = EXIT_FAILURE; } end: if (ret == -ENODEV) fprintf(stderr, "# Uh, ENODEV?!"); starpu_free(matrix); starpu_task_wait_for_all(); return ret; } #define NX_MIN 1024 #define NX_MAX 1024*1024 static int check_size_on_device(uint32_t where, char *device_name) { int nx, ret; struct starpu_codelet vector_codelet; struct starpu_codelet matrix_codelet; fprintf(stderr, "# Device: %s\n", device_name); fprintf(stderr, "# nx vector_timing matrix_timing\n"); starpu_codelet_init(&vector_codelet); vector_codelet.modes[0] = STARPU_RW; vector_codelet.nbuffers = 1; if (where == STARPU_CPU) vector_codelet.cpu_funcs[0] = vector_cpu_func; #ifdef STARPU_USE_CUDA if (where == STARPU_CUDA) { vector_codelet.cuda_funcs[0] = vector_cuda_func; vector_codelet.cuda_flags[0] = STARPU_CUDA_ASYNC; } #endif // if (where == STARPU_OPENCL) vector_codelet.opencl_funcs[0] = vector_opencl_func; starpu_codelet_init(&matrix_codelet); matrix_codelet.modes[0] = STARPU_RW; matrix_codelet.nbuffers = 1; if (where == STARPU_CPU) matrix_codelet.cpu_funcs[0] = matrix_cpu_func; #ifdef STARPU_USE_CUDA if (where == STARPU_CUDA) { matrix_codelet.cuda_funcs[0] = matrix_cuda_func; matrix_codelet.cuda_flags[0] = STARPU_CUDA_ASYNC; } #endif // if (where == STARPU_OPENCL) matrix_codelet.opencl_funcs[0] = matrix_opencl_func; for(nx=NX_MIN ; nx<=NX_MAX ; nx*=2) { ret = check_size(nx, &vector_codelet, &matrix_codelet, device_name); if (ret != EXIT_SUCCESS) break; } return ret; } int main(void) { int ret; unsigned devices; #ifdef STARPU_USE_CUDA int cublas_version; #endif ret = starpu_init(NULL); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); devices = starpu_cpu_worker_get_count(); if (devices) { ret = check_size_on_device(STARPU_CPU, "STARPU_CPU"); if (ret) goto error; } #ifdef STARPU_USE_CUDA devices = starpu_cuda_worker_get_count(); if (devices) { cublasHandle_t handle; cublasCreate(&handle); cublasGetVersion(handle, &cublas_version); cublasDestroy(handle); if (cublas_version >= 7050) { starpu_cublas_init(); ret = check_size_on_device(STARPU_CUDA, "STARPU_CUDA"); if (ret) goto error; starpu_cublas_shutdown(); } } #endif #if 0 devices = starpu_opencl_worker_get_count(); if (devices) { ret = check_size_on_device(STARPU_OPENCL, "STARPU_OPENCL"); if (ret) goto error; } #endif error: if (ret == -ENODEV) ret=STARPU_TEST_SKIPPED; starpu_shutdown(); STARPU_RETURN(ret); }