123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339 |
- //---------------------------------------------------------------------
- //
- // Copyright 2010 Intel Corporation
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- //
- //---------------------------------------------------------------------
- #include "header.h"
- #include "mpinpb.h"
- void copy_faces() {
- //---------------------------------------------------------------------
- //---------------------------------------------------------------------
- //---------------------------------------------------------------------
- //
- // This function copies the face values of a variable defined on a set
- // of cells to the overlap locations of the adjacent sets of cells.
- // Because a set of cells interfaces in each direction with exactly one
- // other set, we only need to fill six different buffers. We could try to
- // overlap communication with computation, by computing
- // some internal values while communicating boundary values, but this
- // adds so much overhead that it's not clearly useful.
- //---------------------------------------------------------------------
- int i, j, k, c, m, p0, p1, phase,
- p2, p3, p4, p5, b_size[6], ss[6],
- sr[6], error;
- #define b_size(m) b_size[m]
- #define ss(m) ss[m]
- #define sr(m) sr[m]
- //---------------------------------------------------------------------
- // exit immediately if there are no faces to be copied
- //---------------------------------------------------------------------
- if (no_nodes == 1) {
- compute_rhs();
- return;
- }
- ss(0) = start_send_east;
- ss(1) = start_send_west;
- ss(2) = start_send_north;
- ss(3) = start_send_south;
- ss(4) = start_send_top;
- ss(5) = start_send_bottom;
- sr(0) = start_recv_east;
- sr(1) = start_recv_west;
- sr(2) = start_recv_north;
- sr(3) = start_recv_south;
- sr(4) = start_recv_top;
- sr(5) = start_recv_bottom;
- b_size(0) = east_size ;
- b_size(1) = west_size ;
- b_size(2) = north_size ;
- b_size(3) = south_size ;
- b_size(4) = top_size ;
- b_size(5) = bottom_size ;
- //---------------------------------------------------------------------
- // because the difference stencil for the diagonalized scheme is
- // orthogonal, we do not have to perform the staged copying of faces,
- // but can send all face information simultaneously to the neighboring
- // cells in all directions
- //---------------------------------------------------------------------
- p0 = 0;
- p1 = 0;
- p2 = 0;
- p3 = 0;
- p4 = 0;
- p5 = 0;
- for (c = 1; c <= ncells; c++) {
- //---------------------------------------------------------------------
- // fill the buffer to be sent to eastern neighbors (i-dir)
- //---------------------------------------------------------------------
- if (cell_coord(1,c) != ncells) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = cell_size(1,c)-2; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- out_buffer(ss(0)+p0) = u(m,i,j,k,c);
- p0 = p0 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // fill the buffer to be sent to western neighbors
- //---------------------------------------------------------------------
- if (cell_coord(1,c) != 1) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = 0; i <= 1; i++) {
- for (m = 1; m <= 5; m++) {
- out_buffer(ss(1)+p1) = u(m,i,j,k,c);
- p1 = p1 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // fill the buffer to be sent to northern neighbors (j_dir)
- //---------------------------------------------------------------------
- if (cell_coord(2,c) != ncells) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = cell_size(2,c)-2; j <= cell_size(2,c)-1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- out_buffer(ss(2)+p2) = u(m,i,j,k,c);
- p2 = p2 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // fill the buffer to be sent to southern neighbors
- //---------------------------------------------------------------------
- if (cell_coord(2,c)!= 1) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = 0; j <= 1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- out_buffer(ss(3)+p3) = u(m,i,j,k,c);
- p3 = p3 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // fill the buffer to be sent to top neighbors (k-dir)
- //---------------------------------------------------------------------
- if (cell_coord(3,c) != ncells) {
- for (k = cell_size(3,c)-2; k <= cell_size(3,c)-1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- out_buffer(ss(4)+p4) = u(m,i,j,k,c);
- p4 = p4 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // fill the buffer to be sent to bottom neighbors
- //---------------------------------------------------------------------
- if (cell_coord(3,c)!= 1) {
- for (k = 0; k <= 1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- out_buffer(ss(5)+p5) = u(m,i,j,k,c);
- p5 = p5 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // cell loop
- //---------------------------------------------------------------------
- }
- for (phase = 0; phase < 3; phase++) {
- if (send_color[WESTDIR]==phase) {
- RCCE_send((char*)(&out_buffer(ss(1))), b_size(1)*sizeof(double), predecessor(1));
- }
- if (recv_color[WESTDIR]==phase) {
- RCCE_recv((char*)(&in_buffer(sr(0))), b_size(0)*sizeof(double), successor(1));
- }
- if (send_color[EASTDIR]==phase) {
- RCCE_send((char*)(&out_buffer(ss(0))), b_size(0)*sizeof(double), successor(1));
- }
- if (recv_color[EASTDIR]==phase) {
- RCCE_recv((char*)(&in_buffer(sr(1))), b_size(1)*sizeof(double), predecessor(1));
- }
- if (send_color[SOUTHDIR]==phase) {
- RCCE_send((char*)(&out_buffer(ss(3))), b_size(3)*sizeof(double), predecessor(2));
- }
- if (recv_color[SOUTHDIR]==phase) {
- RCCE_recv((char*)(&in_buffer(sr(2))), b_size(2)*sizeof(double), successor(2));
- }
- if (send_color[NORTHDIR]==phase) {
- RCCE_send((char*)(&out_buffer(ss(2))), b_size(2)*sizeof(double),successor(2));
- }
- if (recv_color[NORTHDIR]==phase) {
- RCCE_recv((char*)(&in_buffer(sr(3))), b_size(3)*sizeof(double), predecessor(2));
- }
- if (send_color[BOTTOMDIR]==phase) {
- RCCE_send((char*)(&out_buffer(ss(5))), b_size(5)*sizeof(double),predecessor(3));
- }
- if (recv_color[BOTTOMDIR]==phase) {
- RCCE_recv((char*)(&in_buffer(sr(4))), b_size(4)*sizeof(double), successor(3));
- }
- if (send_color[TOPDIR]==phase) {
- RCCE_send((char*)(&out_buffer(ss(4))), b_size(4)*sizeof(double),successor(3));
- }
- if (recv_color[TOPDIR]==phase) {
- RCCE_recv((char*)(&in_buffer(sr(5))), b_size(5)*sizeof(double), predecessor(3));
- }
- }
- //---------------------------------------------------------------------
- // unpack the data that has just been received;
- //---------------------------------------------------------------------
- p0 = 0;
- p1 = 0;
- p2 = 0;
- p3 = 0;
- p4 = 0;
- p5 = 0;
- for (c = 1; c <= ncells; c++) {
- if (cell_coord(1,c) != 1) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = -2; i <= -1; i++) {
- for (m = 1; m <= 5; m++) {
- u(m,i,j,k,c) = in_buffer(sr(1)+p0);
- p0 = p0 + 1;
- }
- }
- }
- }
- }
- if (cell_coord(1,c) != ncells) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = cell_size(1,c); i <= cell_size(1,c)+1; i++) {
- for (m = 1; m <= 5; m++) {
- u(m,i,j,k,c) = in_buffer(sr(0)+p1);
- p1 = p1 + 1;
- }
- }
- }
- }
- }
-
- if (cell_coord(2,c) != 1) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = -2; j <= -1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- u(m,i,j,k,c) = in_buffer(sr(3)+p2);
- p2 = p2 + 1;
- }
- }
- }
- }
- }
-
- if (cell_coord(2,c) != ncells) {
- for (k = 0; k <= cell_size(3,c)-1; k++) {
- for (j = cell_size(2,c); j <= cell_size(2,c)+1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- u(m,i,j,k,c) = in_buffer(sr(2)+p3);
- p3 = p3 + 1;
- }
- }
- }
- }
- }
- if (cell_coord(3,c) != 1) {
- for (k = -2; k <= -1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- u(m,i,j,k,c) = in_buffer(sr(5)+p4);
- p4 = p4 + 1;
- }
- }
- }
- }
- }
- if (cell_coord(3,c) != ncells) {
- for (k = cell_size(3,c); k <= cell_size(3,c)+1; k++) {
- for (j = 0; j <= cell_size(2,c)-1; j++) {
- for (i = 0; i <= cell_size(1,c)-1; i++) {
- for (m = 1; m <= 5; m++) {
- u(m,i,j,k,c) = in_buffer(sr(4)+p5);
- p5 = p5 + 1;
- }
- }
- }
- }
- }
- //---------------------------------------------------------------------
- // cells loop
- //---------------------------------------------------------------------
- }
- //---------------------------------------------------------------------
- // do the rest of the rhs that uses the copied face values
- //---------------------------------------------------------------------
- compute_rhs();
- return;
- }
|