| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 | /* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2021  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. *//* * This examplifies how to use partitioning filters.  We here just split a 3D * matrix into 3D slices (along the X axis), and run a dumb kernel on them. */#include <starpu.h>#define NX    5#define NY    4#define NZ    3#define PARTS 2#define FPRINTF(ofile, fmt, ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ## __VA_ARGS__); }} while(0)extern void cpu_func(void *buffers[], void *cl_arg);#ifdef STARPU_USE_CUDAextern void cuda_func(void *buffers[], void *cl_arg);#endif#ifdef STARPU_USE_OPENCLextern void opencl_func(void *buffers[], void *cl_arg);#endifvoid print_block(int *block, int nx, int ny, int nz, unsigned ldy, unsigned ldz){        int i, j, k;        FPRINTF(stderr, "block=%p nx=%d ny=%d nz=%d ldy=%u ldz=%u\n", block, nx, ny, nz, ldy, ldz);        for(k=0 ; k<nz ; k++)	{                for(j=0 ; j<ny ; j++)		{                        for(i=0 ; i<nx ; i++)			{                                FPRINTF(stderr, "%2d ", block[(k*ldz)+(j*ldy)+i]);                        }                        FPRINTF(stderr,"\n");                }                FPRINTF(stderr,"\n");        }        FPRINTF(stderr,"\n");}void print_data(starpu_data_handle_t block_handle){	int *block = (int *)starpu_block_get_local_ptr(block_handle);	int nx = starpu_block_get_nx(block_handle);	int ny = starpu_block_get_ny(block_handle);	int nz = starpu_block_get_nz(block_handle);	unsigned ldy = starpu_block_get_local_ldy(block_handle);	unsigned ldz = starpu_block_get_local_ldz(block_handle);        print_block(block, nx, ny, nz, ldy, ldz);}#ifdef STARPU_USE_OPENCLstruct starpu_opencl_program opencl_program;#endifint main(void){        int *block,n=0;        int i, j, k;	int ret;        block = (int*)malloc(NX*NY*NZ*sizeof(block[0]));        assert(block);        for(k=0 ; k<NZ ; k++)	{                for(j=0 ; j<NY ; j++)		{                        for(i=0 ; i<NX ; i++)			{                                block[(k*NX*NY)+(j*NX)+i] = n++;                        }                }        }	starpu_data_handle_t handle;	struct starpu_codelet cl =	{                .cpu_funcs = {cpu_func},                .cpu_funcs_name = {"cpu_func"},#ifdef STARPU_USE_CUDA                .cuda_funcs = {cuda_func},		.cuda_flags = {STARPU_CUDA_ASYNC},#endif#ifdef STARPU_USE_OPENCL                .opencl_funcs = {opencl_func},		.opencl_flags = {STARPU_OPENCL_ASYNC},#endif		.nbuffers = 1,                .modes = {STARPU_RW},		.name = "block_scal"	};        ret = starpu_init(NULL);	if (ret == -ENODEV)		exit(77);	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");#ifdef STARPU_USE_OPENCL        ret = starpu_opencl_load_opencl_from_file("examples/filters/fblock_opencl_kernel.cl", &opencl_program, NULL);	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");#endif        /* Declare data to StarPU */        starpu_block_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(int));        FPRINTF(stderr, "IN  Block\n");        print_data(handle);        /* Partition the block in PARTS sub-blocks */	struct starpu_data_filter f =	{		.filter_func = starpu_block_filter_block,		.nchildren = PARTS	};        starpu_data_partition(handle, &f);        FPRINTF(stderr,"Nb of partitions : %d\n",starpu_data_get_nb_children(handle));        for(i=0 ; i<starpu_data_get_nb_children(handle) ; i++)        {                starpu_data_handle_t sblock = starpu_data_get_sub_data(handle, 1, i);                FPRINTF(stderr, "Sub block %d\n", i);                print_data(sblock);        }        /* Submit a task on each sub-block */        for(i=0 ; i<starpu_data_get_nb_children(handle) ; i++)        {                int multiplier=i;                struct starpu_task *task = starpu_task_create();                FPRINTF(stderr,"Dealing with sub-block %d\n", i);                task->cl = &cl;                task->synchronous = 1;                task->callback_func = NULL;                task->handles[0] = starpu_data_get_sub_data(handle, 1, i);                task->cl_arg = &multiplier;                task->cl_arg_size = sizeof(multiplier);                ret = starpu_task_submit(task);                if (ret)		{                        FPRINTF(stderr, "Error when submitting task\n");                        exit(ret);                }        }        /* Unpartition the data, unregister it from StarPU and shutdown */        starpu_data_unpartition(handle, STARPU_MAIN_RAM);        print_data(handle);        starpu_data_unregister(handle);#ifdef STARPU_USE_OPENCL        ret = starpu_opencl_unload_opencl(&opencl_program);	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_unload_opencl");#endif        /* Print result block */        FPRINTF(stderr, "OUT Block\n");        print_block(block, NX, NY, NZ, NX, NX*NY);	free(block);	starpu_shutdown();	return 0;}
 |