1//--------------------------------------------------------------------- 2//--------------------------------------------------------------------- 3#include "header.h" 4#include "mpinpb.h" 5 6void copy_faces() { 7 8//--------------------------------------------------------------------- 9//--------------------------------------------------------------------- 10 11//--------------------------------------------------------------------- 12// 13// This function copies the face values of a variable defined on a set 14// of cells to the overlap locations of the adjacent sets of cells. 15// Because a set of cells interfaces in each direction with exactly one 16// other set, we only need to fill six different buffers. We could try to 17// overlap communication with computation, by computing 18// some internal values while communicating boundary values, but this 19// adds so much overhead that it's not clearly useful. 20//--------------------------------------------------------------------- 21 22 int i, j, k, c, m, p0, p1, phase, 23 p2, p3, p4, p5, b_size[6], ss[6], 24 sr[6], error; 25 26#define b_size(m) b_size[m] 27#define ss(m) ss[m] 28#define sr(m) sr[m] 29 30//--------------------------------------------------------------------- 31// exit immediately if there are no faces to be copied 32//--------------------------------------------------------------------- 33 if (no_nodes == 1) { 34 compute_rhs(); 35 return; 36 } 37 38 ss(0) = start_send_east; 39 ss(1) = start_send_west; 40 ss(2) = start_send_north; 41 ss(3) = start_send_south; 42 ss(4) = start_send_top; 43 ss(5) = start_send_bottom; 44 45 sr(0) = start_recv_east; 46 sr(1) = start_recv_west; 47 sr(2) = start_recv_north; 48 sr(3) = start_recv_south; 49 sr(4) = start_recv_top; 50 sr(5) = start_recv_bottom; 51 52 b_size(0) = east_size ; 53 b_size(1) = west_size ; 54 b_size(2) = north_size ; 55 b_size(3) = south_size ; 56 b_size(4) = top_size ; 57 b_size(5) = bottom_size ; 58 59//--------------------------------------------------------------------- 60// because the difference stencil for the diagonalized scheme is 61// orthogonal, we do not have to perform the staged copying of faces, 62// but can send all face information simultaneously to the neighboring 63// cells in all directions 64//--------------------------------------------------------------------- 65 p0 = 0; 66 p1 = 0; 67 p2 = 0; 68 p3 = 0; 69 p4 = 0; 70 p5 = 0; 71 72 for (c = 1; c <= ncells; c++) { 73 74//--------------------------------------------------------------------- 75// fill the buffer to be sent to eastern neighbors (i-dir) 76//--------------------------------------------------------------------- 77 if (cell_coord(1,c) != ncells) { 78 for (k = 0; k <= cell_size(3,c)-1; k++) { 79 for (j = 0; j <= cell_size(2,c)-1; j++) { 80 for (i = cell_size(1,c)-2; i <= cell_size(1,c)-1; i++) { 81 for (m = 1; m <= 5; m++) { 82 out_buffer(ss(0)+p0) = u(m,i,j,k,c); 83 p0 = p0 + 1; 84 } 85 } 86 } 87 } 88 } 89 90//--------------------------------------------------------------------- 91// fill the buffer to be sent to western neighbors 92//--------------------------------------------------------------------- 93 if (cell_coord(1,c) != 1) { 94 for (k = 0; k <= cell_size(3,c)-1; k++) { 95 for (j = 0; j <= cell_size(2,c)-1; j++) { 96 for (i = 0; i <= 1; i++) { 97 for (m = 1; m <= 5; m++) { 98 out_buffer(ss(1)+p1) = u(m,i,j,k,c); 99 p1 = p1 + 1; 100 } 101 } 102 } 103 } 104 105 } 106 107//--------------------------------------------------------------------- 108// fill the buffer to be sent to northern neighbors (j_dir) 109//--------------------------------------------------------------------- 110 if (cell_coord(2,c) != ncells) { 111 for (k = 0; k <= cell_size(3,c)-1; k++) { 112 for (j = cell_size(2,c)-2; j <= cell_size(2,c)-1; j++) { 113 for (i = 0; i <= cell_size(1,c)-1; i++) { 114 for (m = 1; m <= 5; m++) { 115 out_buffer(ss(2)+p2) = u(m,i,j,k,c); 116 p2 = p2 + 1; 117 } 118 } 119 } 120 } 121 } 122 123//--------------------------------------------------------------------- 124// fill the buffer to be sent to southern neighbors 125//--------------------------------------------------------------------- 126 if (cell_coord(2,c)!= 1) { 127 for (k = 0; k <= cell_size(3,c)-1; k++) { 128 for (j = 0; j <= 1; j++) { 129 for (i = 0; i <= cell_size(1,c)-1; i++) { 130 for (m = 1; m <= 5; m++) { 131 out_buffer(ss(3)+p3) = u(m,i,j,k,c); 132 p3 = p3 + 1; 133 } 134 } 135 } 136 } 137 } 138 139//--------------------------------------------------------------------- 140// fill the buffer to be sent to top neighbors (k-dir) 141//--------------------------------------------------------------------- 142 if (cell_coord(3,c) != ncells) { 143 for (k = cell_size(3,c)-2; k <= cell_size(3,c)-1; k++) { 144 for (j = 0; j <= cell_size(2,c)-1; j++) { 145 for (i = 0; i <= cell_size(1,c)-1; i++) { 146 for (m = 1; m <= 5; m++) { 147 out_buffer(ss(4)+p4) = u(m,i,j,k,c); 148 p4 = p4 + 1; 149 } 150 } 151 } 152 } 153 } 154 155//--------------------------------------------------------------------- 156// fill the buffer to be sent to bottom neighbors 157//--------------------------------------------------------------------- 158 if (cell_coord(3,c)!= 1) { 159 for (k = 0; k <= 1; k++) { 160 for (j = 0; j <= cell_size(2,c)-1; j++) { 161 for (i = 0; i <= cell_size(1,c)-1; i++) { 162 for (m = 1; m <= 5; m++) { 163 out_buffer(ss(5)+p5) = u(m,i,j,k,c); 164 p5 = p5 + 1; 165 } 166 } 167 } 168 } 169 } 170 171//--------------------------------------------------------------------- 172// cell loop 173//--------------------------------------------------------------------- 174 } 175 176 for (phase = 0; phase < 3; phase++) { 177 178 if (send_color[WESTDIR]==phase) { 179 RCCE_send((char*)(&out_buffer(ss(1))), b_size(1)*sizeof(double), predecessor(1)); 180 } 181 if (recv_color[WESTDIR]==phase) { 182 RCCE_recv((char*)(&in_buffer(sr(0))), b_size(0)*sizeof(double), successor(1)); 183 } 184 185 if (send_color[EASTDIR]==phase) { 186 RCCE_send((char*)(&out_buffer(ss(0))), b_size(0)*sizeof(double), successor(1)); 187 } 188 if (recv_color[EASTDIR]==phase) { 189 RCCE_recv((char*)(&in_buffer(sr(1))), b_size(1)*sizeof(double), predecessor(1)); 190 } 191 192 if (send_color[SOUTHDIR]==phase) { 193 RCCE_send((char*)(&out_buffer(ss(3))), b_size(3)*sizeof(double), predecessor(2)); 194 } 195 if (recv_color[SOUTHDIR]==phase) { 196 RCCE_recv((char*)(&in_buffer(sr(2))), b_size(2)*sizeof(double), successor(2)); 197 } 198 199 if (send_color[NORTHDIR]==phase) { 200 RCCE_send((char*)(&out_buffer(ss(2))), b_size(2)*sizeof(double),successor(2)); 201 } 202 if (recv_color[NORTHDIR]==phase) { 203 RCCE_recv((char*)(&in_buffer(sr(3))), b_size(3)*sizeof(double), predecessor(2)); 204 } 205 206 if (send_color[BOTTOMDIR]==phase) { 207 RCCE_send((char*)(&out_buffer(ss(5))), b_size(5)*sizeof(double),predecessor(3)); 208 } 209 if (recv_color[BOTTOMDIR]==phase) { 210 RCCE_recv((char*)(&in_buffer(sr(4))), b_size(4)*sizeof(double), successor(3)); 211 } 212 213 if (send_color[TOPDIR]==phase) { 214 RCCE_send((char*)(&out_buffer(ss(4))), b_size(4)*sizeof(double),successor(3)); 215 } 216 if (recv_color[TOPDIR]==phase) { 217 RCCE_recv((char*)(&in_buffer(sr(5))), b_size(5)*sizeof(double), predecessor(3)); 218 } 219 } 220 221//--------------------------------------------------------------------- 222// unpack the data that has just been received; 223//--------------------------------------------------------------------- 224 p0 = 0; 225 p1 = 0; 226 p2 = 0; 227 p3 = 0; 228 p4 = 0; 229 p5 = 0; 230 231 for (c = 1; c <= ncells; c++) { 232 233 if (cell_coord(1,c) != 1) { 234 for (k = 0; k <= cell_size(3,c)-1; k++) { 235 for (j = 0; j <= cell_size(2,c)-1; j++) { 236 for (i = -2; i <= -1; i++) { 237 for (m = 1; m <= 5; m++) { 238 u(m,i,j,k,c) = in_buffer(sr(1)+p0); 239 p0 = p0 + 1; 240 } 241 } 242 } 243 } 244 } 245 246 if (cell_coord(1,c) != ncells) { 247 for (k = 0; k <= cell_size(3,c)-1; k++) { 248 for (j = 0; j <= cell_size(2,c)-1; j++) { 249 for (i = cell_size(1,c); i <= cell_size(1,c)+1; i++) { 250 for (m = 1; m <= 5; m++) { 251 u(m,i,j,k,c) = in_buffer(sr(0)+p1); 252 p1 = p1 + 1; 253 } 254 } 255 } 256 } 257 } 258 259 if (cell_coord(2,c) != 1) { 260 for (k = 0; k <= cell_size(3,c)-1; k++) { 261 for (j = -2; j <= -1; j++) { 262 for (i = 0; i <= cell_size(1,c)-1; i++) { 263 for (m = 1; m <= 5; m++) { 264 u(m,i,j,k,c) = in_buffer(sr(3)+p2); 265 p2 = p2 + 1; 266 } 267 } 268 } 269 } 270 271 } 272 273 if (cell_coord(2,c) != ncells) { 274 for (k = 0; k <= cell_size(3,c)-1; k++) { 275 for (j = cell_size(2,c); j <= cell_size(2,c)+1; j++) { 276 for (i = 0; i <= cell_size(1,c)-1; i++) { 277 for (m = 1; m <= 5; m++) { 278 u(m,i,j,k,c) = in_buffer(sr(2)+p3); 279 p3 = p3 + 1; 280 } 281 } 282 } 283 } 284 } 285 286 if (cell_coord(3,c) != 1) { 287 for (k = -2; k <= -1; k++) { 288 for (j = 0; j <= cell_size(2,c)-1; j++) { 289 for (i = 0; i <= cell_size(1,c)-1; i++) { 290 for (m = 1; m <= 5; m++) { 291 u(m,i,j,k,c) = in_buffer(sr(5)+p4); 292 p4 = p4 + 1; 293 } 294 } 295 } 296 } 297 } 298 299 if (cell_coord(3,c) != ncells) { 300 for (k = cell_size(3,c); k <= cell_size(3,c)+1; k++) { 301 for (j = 0; j <= cell_size(2,c)-1; j++) { 302 for (i = 0; i <= cell_size(1,c)-1; i++) { 303 for (m = 1; m <= 5; m++) { 304 u(m,i,j,k,c) = in_buffer(sr(4)+p5); 305 p5 = p5 + 1; 306 } 307 } 308 } 309 } 310 } 311 312//--------------------------------------------------------------------- 313// cells loop 314//--------------------------------------------------------------------- 315 } 316 317//--------------------------------------------------------------------- 318// do the rest of the rhs that uses the copied face values 319//--------------------------------------------------------------------- 320 compute_rhs(); 321 322 return; 323} 324