| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406 | 
							- /* StarPU --- Runtime system for heterogeneous multicore architectures.
 
-  *
 
-  * Copyright (C) 2010-2011,2013-2017                      Université de Bordeaux
 
-  * Copyright (C) 2011,2013,2015-2017                      CNRS
 
-  * Copyright (C) 2013                                     Inria
 
-  *
 
-  * StarPU is free software; you can redistribute it and/or modify
 
-  * it under the terms of the GNU Lesser General Public License as published by
 
-  * the Free Software Foundation; either version 2.1 of the License, or (at
 
-  * your option) any later version.
 
-  *
 
-  * StarPU is distributed in the hope that it will be useful, but
 
-  * WITHOUT ANY WARRANTY; without even the implied warranty of
 
-  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
-  *
 
-  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-  */
 
- #include "stencil.h"
 
- #include <math.h>
 
- /* Manage block and tags allocation */
 
- static struct block_description *blocks;
 
- static unsigned sizex, sizey, sizez;
 
- static unsigned nbz;
 
- static unsigned *block_sizes_z;
 
- /*
 
-  *	Tags for various codelet completion
 
-  */
 
- /*
 
-  * common tag format:
 
-  */
 
- static starpu_tag_t tag_common(int z, int dir, int type)
 
- {
 
- 	return (((((starpu_tag_t)type) << 4) | ((dir+1)/2)) << 32)|(starpu_tag_t)z;
 
- }
 
- /* Completion of last update tasks */
 
- starpu_tag_t TAG_FINISH(int z)
 
- {
 
- 	z = (z + nbz)%nbz;
 
- 	starpu_tag_t tag = tag_common(z, 0, 1);
 
- 	return tag;
 
- }
 
- /* Completion of the save codelet for MPI send/recv */
 
- starpu_tag_t TAG_START(int z, int dir)
 
- {
 
- 	z = (z + nbz)%nbz;
 
- 	starpu_tag_t tag = tag_common(z, dir, 2);
 
- 	return tag;
 
- }
 
- /*
 
-  * common MPI tag format:
 
-  * iter is actually not needed for coherency, but it makes debugging easier
 
-  */
 
- static int mpi_tag_common(int z, int iter, int dir, int buffer)
 
- {
 
- 	return (((((iter << 12)|z)<<4) | ((1+dir)/2))<<4)|buffer;
 
- }
 
- int MPI_TAG0(int z, int iter, int dir)
 
- {
 
- 	z = (z + nbz)%nbz;
 
- 	int tag = mpi_tag_common(z, iter, dir, 0);
 
- 	return tag;
 
- }
 
- int MPI_TAG1(int z, int iter, int dir)
 
- {
 
- 	z = (z + nbz)%nbz;
 
- 	int tag = mpi_tag_common(z, iter, dir, 1);
 
- 	return tag;
 
- }
 
- /*
 
-  *	Block descriptors
 
-  */
 
- /* Compute the size of the different blocks */
 
- static void compute_block_sizes(void)
 
- {
 
- 	block_sizes_z = (unsigned *) malloc(nbz*sizeof(unsigned));
 
- 	STARPU_ASSERT(block_sizes_z);
 
- 	/* Perhaps the last chunk is smaller */
 
- 	unsigned default_block_size = (sizez+nbz-1)/nbz;
 
- 	unsigned remaining = sizez;
 
- 	unsigned b;
 
- 	for (b = 0; b < nbz; b++)
 
- 	{
 
- 		block_sizes_z[b] = MIN(default_block_size, remaining);
 
- 		remaining -= block_sizes_z[b];
 
- 	}
 
- 	STARPU_ASSERT(remaining == 0);
 
- }
 
- unsigned get_block_size(int bz)
 
- {
 
- 	return block_sizes_z[bz];
 
- }
 
- struct block_description *get_block_description(int z)
 
- {
 
- 	z = (z + nbz)%nbz;
 
- 	STARPU_ASSERT(&blocks[z]);
 
- 	return &blocks[z];
 
- }
 
- int get_block_mpi_node(int z)
 
- {
 
- 	z = (z + nbz)%nbz;
 
- 	return blocks[z].mpi_node;
 
- }
 
- void create_blocks_array(unsigned _sizex, unsigned _sizey, unsigned _sizez, unsigned _nbz)
 
- {
 
- 	/* Store the parameters */
 
- 	nbz = _nbz;
 
- 	sizex = _sizex;
 
- 	sizey = _sizey;
 
- 	sizez = _sizez;
 
- 	/* Create a grid of block descriptors */
 
- 	blocks = (struct block_description *) calloc(nbz, sizeof(struct block_description));
 
- 	STARPU_ASSERT(blocks);
 
- 	/* What is the size of the different blocks ? */
 
- 	compute_block_sizes();
 
- 	unsigned bz;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description * block =
 
- 				get_block_description(bz);
 
- 		/* Which block is it ? */
 
- 		block->bz = bz;
 
- 		/* For simplicity, we store which are the neighbours blocks */
 
- 		block->boundary_blocks[B] = get_block_description((bz-1+nbz)%nbz);
 
- 		block->boundary_blocks[T] = get_block_description((bz+1)%nbz);
 
- 	}
 
- }
 
- void free_blocks_array()
 
- {
 
- 	free(blocks);
 
- 	free(block_sizes_z);
 
- }
 
- /*
 
-  *	Initialization of the blocks
 
-  */
 
- void assign_blocks_to_workers(int rank)
 
- {
 
- 	unsigned bz;
 
- 	/* NB: perhaps we could count a GPU as multiple workers */
 
- 	/* how many workers are there ? */
 
- 	/*unsigned nworkers = starpu_worker_get_count();*/
 
- 	/* how many blocks are on that MPI node ? */
 
- 	unsigned nblocks = 0;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description *block =
 
- 				get_block_description(bz);
 
- 		if (block->mpi_node == rank)
 
- 			nblocks++;
 
- 	}
 
- 	/* how many blocks per worker ? */
 
- 	/*unsigned nblocks_per_worker = (nblocks + nworkers - 1)/nworkers;*/
 
- 	/* we now attribute up to nblocks_per_worker blocks per workers */
 
- 	unsigned attributed = 0;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description *block =
 
- 				get_block_description(bz);
 
- 		if (block->mpi_node == rank)
 
- 		{
 
- 			unsigned workerid;
 
- 			/* Manage initial block distribution between CPU and GPU */
 
- 		#if 0
 
- 			#if 1
 
- 			/* GPUs then CPUs */
 
- 			if (attributed < 3*18)
 
- 				workerid = attributed / 18;
 
- 			else
 
- 				workerid = 3+ (attributed - 3*18) / 2;
 
- 			#else
 
- 			/* GPUs interleaved with CPUs */
 
- 			if ((attributed % 20) <= 1)
 
- 				workerid = 3 + attributed / 20;
 
- 			else if (attributed < 60)
 
- 				workerid = attributed / 20;
 
- 			else
 
- 				workerid = (attributed - 60)/2 + 6;
 
- 			#endif
 
- 		#else
 
- 			/* Only GPUS */
 
- 			workerid = (attributed / 21) % 3;
 
- 		#endif
 
- 			/*= attributed/nblocks_per_worker;*/
 
- 			block->preferred_worker = workerid;
 
- 			attributed++;
 
- 		}
 
- 	}
 
- }
 
- void assign_blocks_to_mpi_nodes(int world_size)
 
- {
 
- 	unsigned nzblocks_per_process = (nbz + world_size - 1) / world_size;
 
- 	unsigned bz;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description *block =
 
- 				get_block_description(bz);
 
- 		block->mpi_node = bz / nzblocks_per_process;
 
- 	}
 
- }
 
- static size_t allocated = 0;
 
- static void allocate_block_on_node(starpu_data_handle_t *handleptr, unsigned bz, TYPE **ptr, unsigned nx, unsigned ny, unsigned nz)
 
- {
 
- 	int ret;
 
- 	size_t block_size = nx*ny*nz*sizeof(TYPE);
 
- 	/* Allocate memory */
 
- #if 1
 
- 	ret = starpu_malloc_flags((void **)ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
 
- 	STARPU_ASSERT(ret == 0);
 
- #else
 
- 	*ptr = malloc(block_size);
 
- 	STARPU_ASSERT(*ptr);
 
- #endif
 
- 	allocated += block_size;
 
- #ifndef STARPU_SIMGRID
 
- 	/* Fill the blocks with 0 */
 
- 	memset(*ptr, 0, block_size);
 
- #endif
 
- 	/* Register it to StarPU */
 
- 	starpu_block_data_register(handleptr, STARPU_MAIN_RAM, (uintptr_t)*ptr, nx, nx*ny, nx, ny, nz, sizeof(TYPE));
 
- 	starpu_data_set_coordinates(*handleptr, 1, bz);
 
- }
 
- static void free_block_on_node(starpu_data_handle_t handleptr, unsigned nx, unsigned ny, unsigned nz)
 
- {
 
- 	void *ptr = (void *) starpu_block_get_local_ptr(handleptr);
 
- 	size_t block_size = nx*ny*nz*sizeof(TYPE);
 
- 	starpu_data_unregister(handleptr);
 
- 	starpu_free_flags(ptr, block_size, STARPU_MALLOC_PINNED|STARPU_MALLOC_SIMULATION_FOLDED);
 
- }
 
- void display_memory_consumption(int rank)
 
- {
 
- 	FPRINTF(stderr, "%lu B of memory were allocated on node %d\n", (unsigned long) allocated, rank);
 
- }
 
- void allocate_memory_on_node(int rank)
 
- {
 
- 	unsigned bz;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description *block = get_block_description(bz);
 
- 		int node = block->mpi_node;
 
- 		/* Main blocks */
 
- 		if (node == rank)
 
- 		{
 
- 			unsigned size_bz = block_sizes_z[bz];
 
- 			allocate_block_on_node(&block->layers_handle[0], bz, &block->layers[0],
 
- 						(sizex + 2*K), (sizey + 2*K), (size_bz + 2*K));
 
- #ifndef STARPU_SIMGRID
 
- #ifdef LIFE
 
- 			unsigned x, y, z;
 
- 			unsigned sum = 0;
 
- 			for (x = 0; x < sizex; x++)
 
- 				for (y = 0; y < sizey; y++)
 
- 					for (z = 0; z < size_bz; z++)
 
- 						/* Just random data */
 
- 						sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)] = (int)((x/7.+y/13.+(bz*size_bz + z)/17.) * 10.) % 2;
 
- /*			printf("block %d starts with %d/%d alive\n", bz, sum, sizex*sizey*size_bz);*/
 
- #endif
 
- #endif
 
- 			allocate_block_on_node(&block->layers_handle[1], bz, &block->layers[1],
 
- 						(sizex + 2*K), (sizey + 2*K), (size_bz + 2*K));
 
- 		}
 
- 		/* Boundary blocks : Top */
 
- 		int top_node = block->boundary_blocks[T]->mpi_node;
 
- 		if ((node == rank) || (top_node == rank))
 
- 		{
 
- 			allocate_block_on_node(&block->boundaries_handle[T][0], bz, &block->boundaries[T][0],
 
- 						(sizex + 2*K), (sizey + 2*K), K);
 
- 			allocate_block_on_node(&block->boundaries_handle[T][1], bz, &block->boundaries[T][1],
 
- 						(sizex + 2*K), (sizey + 2*K), K);
 
- 		}
 
- 		/* Boundary blocks : Bottom */
 
- 		int bottom_node = block->boundary_blocks[B]->mpi_node;
 
- 		if ((node == rank) || (bottom_node == rank))
 
- 		{
 
- 			allocate_block_on_node(&block->boundaries_handle[B][0], bz, &block->boundaries[B][0],
 
- 						(sizex + 2*K), (sizey + 2*K), K);
 
- 			allocate_block_on_node(&block->boundaries_handle[B][1], bz, &block->boundaries[B][1],
 
- 						(sizex + 2*K), (sizey + 2*K), K);
 
- 		}
 
- 	}
 
- }
 
- void free_memory_on_node(int rank)
 
- {
 
- 	unsigned bz;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description *block = get_block_description(bz);
 
- 		int node = block->mpi_node;
 
- 		/* Main blocks */
 
- 		if (node == rank)
 
- 		{
 
- 			free_block_on_node(block->layers_handle[0], (sizex + 2*K), (sizey + 2*K), K);
 
- 			free_block_on_node(block->layers_handle[1], (sizex + 2*K), (sizey + 2*K), K);
 
- 		}
 
- 		/* Boundary blocks : Top */
 
- 		int top_node = block->boundary_blocks[T]->mpi_node;
 
- 		if ((node == rank) || (top_node == rank))
 
- 		{
 
- 			free_block_on_node(block->boundaries_handle[T][0], (sizex + 2*K), (sizey + 2*K), K);
 
- 			free_block_on_node(block->boundaries_handle[T][1], (sizex + 2*K), (sizey + 2*K), K);
 
- 		}
 
- 		/* Boundary blocks : Bottom */
 
- 		int bottom_node = block->boundary_blocks[B]->mpi_node;
 
- 		if ((node == rank) || (bottom_node == rank))
 
- 		{
 
- 			free_block_on_node(block->boundaries_handle[B][0], (sizex + 2*K), (sizey + 2*K), K);
 
- 			free_block_on_node(block->boundaries_handle[B][1], (sizex + 2*K), (sizey + 2*K), K);
 
- 		}
 
- 	}
 
- }
 
- /* check how many cells are alive */
 
- void check(int rank)
 
- {
 
- 	unsigned bz;
 
- 	for (bz = 0; bz < nbz; bz++)
 
- 	{
 
- 		struct block_description *block = get_block_description(bz);
 
- 		int node = block->mpi_node;
 
- 		/* Main blocks */
 
- 		if (node == rank)
 
- 		{
 
- #ifdef LIFE
 
- 			unsigned size_bz = block_sizes_z[bz];
 
- 			unsigned x, y, z;
 
- 			unsigned sum = 0;
 
- 			for (x = 0; x < sizex; x++)
 
- 				for (y = 0; y < sizey; y++)
 
- 					for (z = 0; z < size_bz; z++)
 
- 						sum += block->layers[0][(K+x)+(K+y)*(sizex + 2*K)+(K+z)*(sizex+2*K)*(sizey+2*K)];
 
- 			printf("block %u got %u/%u alive\n", bz, sum, sizex*sizey*size_bz);
 
- #endif
 
- 		}
 
- 	}
 
- }
 
 
  |