/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2012 Université de Bordeaux 1 * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ /* * This examplifies the use of the shadow filter: a source vector of NX * elements (plus 2*SHADOW wrap-around elements) is copied into a destination * vector of NX+NPARTS*2*SHADOW elements, thus showing how shadowing shows up. */ #include #include /* Shadow width */ #define SHADOW 2 #define NX 30 #define PARTS 3 #define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0) void cpu_func(void *buffers[], void *cl_arg) { unsigned i; /* length of the shadowed source vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the shadowed source vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); /* length of the destination vector */ unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); /* local copy of the destination vector pointer */ int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); for (i = 0; i < n; i++) val2[i] = val[i]; } #ifdef STARPU_USE_CUDA void cuda_func(void *buffers[], void *cl_arg) { /* length of the shadowed source vector */ unsigned n = STARPU_VECTOR_GET_NX(buffers[0]); /* local copy of the shadowed source vector pointer */ int *val = (int *)STARPU_VECTOR_GET_PTR(buffers[0]); /* length of the destination vector */ unsigned n2 = STARPU_VECTOR_GET_NX(buffers[1]); /* local copy of the destination vector pointer */ int *val2 = (int *)STARPU_VECTOR_GET_PTR(buffers[1]); /* If things go right, sizes should match */ STARPU_ASSERT(n == n2); cudaMemcpy(val2, val, n*sizeof(*val), cudaMemcpyDeviceToDevice); cudaStreamSynchronize(starpu_cuda_get_local_stream()); } #endif int main(int argc, char **argv) { unsigned i, j; int vector[NX + 2*SHADOW]; int vector2[NX + PARTS*2*SHADOW]; starpu_data_handle_t handle, handle2; int factor=1; int ret; struct starpu_codelet cl = { .where = STARPU_CPU #ifdef STARPU_USE_CUDA |STARPU_CUDA #endif , .cpu_funcs = {cpu_func, NULL}, #ifdef STARPU_USE_CUDA .cuda_funcs = {cuda_func, NULL}, #endif .nbuffers = 2, .modes = {STARPU_R, STARPU_W} }; for(i=0 ; ihandles[0] = sub_handle; task->handles[1] = sub_handle2; task->cl = &cl; task->synchronous = 1; task->cl_arg = &factor; task->cl_arg_size = sizeof(factor); ret = starpu_task_submit(task); if (ret == -ENODEV) goto enodev; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } starpu_data_unpartition(handle, 0); starpu_data_unpartition(handle2, 0); starpu_data_unregister(handle); starpu_data_unregister(handle2); starpu_shutdown(); FPRINTF(stderr,"OUT Vector: "); for(i=0 ; i