1//--------------------------------------------------------------------- 2//--------------------------------------------------------------------- 3#include "header.h" 4#include "mpinpb.h" 5#include "work_lhs.h" 6 7extern void y_sendrecv_solve(int c, int cprev); 8extern void y_sendrecv_back(int c, int cprev); 9extern void y_backsubstitute(int first, int last, int c); 10extern void y_solve_cell(int first, int last, int c); 11 12void y_solve() { 13 14//--------------------------------------------------------------------- 15//--------------------------------------------------------------------- 16 17//--------------------------------------------------------------------- 18// Performs line solves in Y direction by first factoring 19// the block-tridiagonal matrix into an upper triangular matrix, 20// and then performing back substitution to solve for the unknow 21// vectors of each line. 22// 23// Make sure we treat elements zero to cell_size in the direction 24// of the sweep. 25//--------------------------------------------------------------------- 26 27 int c, cprev, stage, first, last, error; 28 29//--------------------------------------------------------------------- 30// in our terminology stage is the number of the cell in the y-direction 31// i.e. stage = 1 means the start of the line stage=ncells means end 32//--------------------------------------------------------------------- 33 for (stage = 1; stage <= ncells; stage++) { 34 c = slice(2,stage); 35//--------------------------------------------------------------------- 36// set last-cell flag 37//--------------------------------------------------------------------- 38 first = (stage == 1); 39 last = (stage == ncells); 40 41 if (stage >1) { 42 cprev = slice(2,stage-1); 43 y_sendrecv_solve(c, cprev); 44 } 45 y_solve_cell(first,last,c); 46 } 47 48//--------------------------------------------------------------------- 49// now perform backsubstitution in reverse direction 50//--------------------------------------------------------------------- 51 for (stage = ncells; stage >= 1; stage--) { 52 c = slice(2,stage); 53 first = (stage == 1); 54 last = (stage == ncells); 55 56 if (stage <ncells) { 57 cprev = slice(2,stage+1); 58 y_sendrecv_back(c, cprev); 59 } 60 61 y_backsubstitute(first,last,c); 62 } 63 64 return; 65} 66 67 68void y_sendrecv_solve(int c, int cprev) { 69 70//--------------------------------------------------------------------- 71// pack up and send C'(jend) and rhs'(jend) for 72// all i and k 73//--------------------------------------------------------------------- 74 75 int i,k,m,n,jsize,ptr,jstart; 76 int phase; 77 int error,buffer_size; 78 79 jsize = cell_size(2,cprev)-1; 80 buffer_size=MAX_CELL_DIM*MAX_CELL_DIM* 81 (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE); 82 83//--------------------------------------------------------------------- 84// pack up buffer 85//--------------------------------------------------------------------- 86 ptr = 0; 87 for (k = 0; k <= KMAX-1; k++) { 88 for (i = 0; i <= IMAX-1; i++) { 89 for (m = 1; m <= BLOCK_SIZE; m++) { 90 for (n = 1; n <= BLOCK_SIZE; n++) { 91 in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,cprev); 92 } 93 ptr = ptr+BLOCK_SIZE; 94 } 95 for (n = 1; n <= BLOCK_SIZE; n++) { 96 in_buffer(ptr+n) = rhs(n,i,jsize,k,cprev); 97 } 98 ptr = ptr+BLOCK_SIZE; 99 } 100 } 101 102//--------------------------------------------------------------------- 103// send and receive buffer 104//--------------------------------------------------------------------- 105 106 for (phase = 0; phase < 3; phase++) { 107 108 if (send_color[NORTHDIR]==phase) 109 RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(2)); 110 if (recv_color[NORTHDIR]==phase) 111 RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(2)); 112 } 113 114//--------------------------------------------------------------------- 115// unpack buffer 116//--------------------------------------------------------------------- 117 jstart = 0; 118 ptr = 0; 119 for (k = 0; k <= KMAX-1; k++) { 120 for (i = 0; i <= IMAX-1; i++) { 121 for (m = 1; m <= BLOCK_SIZE; m++) { 122 for (n = 1; n <= BLOCK_SIZE; n++) { 123 lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n); 124 } 125 ptr = ptr+BLOCK_SIZE; 126 } 127 for (n = 1; n <= BLOCK_SIZE; n++) { 128 rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n); 129 } 130 ptr = ptr+BLOCK_SIZE; 131 } 132 } 133 134 return; 135} 136 137//--------------------------------------------------------------------- 138//--------------------------------------------------------------------- 139 140void y_sendrecv_back(int c, int cprev) { 141 142//--------------------------------------------------------------------- 143// pack up and send U(jstart) for all i and k 144//--------------------------------------------------------------------- 145 146 int i,k,n,ptr,jstart; 147 int phase; 148 int error,buffer_size; 149 150//--------------------------------------------------------------------- 151// Send element 0 to previous processor 152//--------------------------------------------------------------------- 153 jstart = 0; 154 buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE; 155 ptr = 0; 156 for (k = 0; k <= KMAX-1; k++) { 157 for (i = 0; i <= IMAX-1; i++) { 158 for (n = 1; n <= BLOCK_SIZE; n++) { 159 in_buffer(ptr+n) = rhs(n,i,jstart,k,cprev); 160 } 161 ptr = ptr+BLOCK_SIZE; 162 } 163 } 164 165//--------------------------------------------------------------------- 166// send and receive buffer 167//--------------------------------------------------------------------- 168 169 for (phase = 0; phase < 3; phase++) { 170 171 if (send_color[SOUTHDIR]==phase) 172 RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(2)); 173 if (recv_color[SOUTHDIR]==phase) 174 RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(2)); 175 } 176 177//--------------------------------------------------------------------- 178// unpack U(jsize) for all i and k 179//--------------------------------------------------------------------- 180 181 ptr = 0; 182 for (k = 0; k <= KMAX-1; k++) { 183 for (i = 0; i <= IMAX-1; i++) { 184 for (n = 1; n <= BLOCK_SIZE; n++) { 185 backsub_info(n,i,k,c) = out_buffer(ptr+n); 186 } 187 ptr = ptr+BLOCK_SIZE; 188 } 189 } 190 191 return; 192} 193 194void y_backsubstitute(int first, int last, int c) { 195 196//--------------------------------------------------------------------- 197//--------------------------------------------------------------------- 198 199//--------------------------------------------------------------------- 200// back solve: if last cell, then generate U(jsize)=rhs(jsize) 201// else assume U(jsize) is loaded in un pack backsub_info 202// so just use it 203// after call u(jstart) will be sent to next cell 204//--------------------------------------------------------------------- 205 206 int i, k; 207 int m,n,j,jsize,isize,ksize,jstart; 208 209 jstart = 0; 210 isize = cell_size(1,c)-end(1,c)-1 ; 211 jsize = cell_size(2,c)-1; 212 ksize = cell_size(3,c)-end(3,c)-1; 213 if (last == 0) { 214 for (k = start(3,c); k <= ksize; k++) { 215 for (i = start(1,c); i <= isize; i++) { 216//--------------------------------------------------------------------- 217// U(jsize) uses info from previous cell if not last cell 218//--------------------------------------------------------------------- 219 for (m = 1; m <= BLOCK_SIZE; m++) { 220 for (n = 1; n <= BLOCK_SIZE; n++) { 221 rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c) 222 - lhsc(m,n,i,jsize,k,c)* 223 backsub_info(n,i,k,c); 224 } 225 } 226 } 227 } 228 } 229 for (k = start(3,c); k <= ksize; k++) { 230 for (j = jsize-1; j >= jstart; j--) { 231 for (i = start(1,c); i <= isize; i++) { 232 for (m = 1; m <= BLOCK_SIZE; m++) { 233 for (n = 1; n <= BLOCK_SIZE; n++) { 234 rhs(m,i,j,k,c) = rhs(m,i,j,k,c) 235 - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c); 236 } 237 } 238 } 239 } 240 } 241 242 return; 243} 244 245//--------------------------------------------------------------------- 246//--------------------------------------------------------------------- 247 248void y_solve_cell(int first,int last,int c) { 249 250//--------------------------------------------------------------------- 251//--------------------------------------------------------------------- 252 253//--------------------------------------------------------------------- 254// performs guaussian elimination on this cell. 255// 256// assumes that unpacking routines for non-first cells 257// preload C' and rhs' from previous cell. 258// 259// assumed send happens outside this routine, but that 260// c'(JMAX) and rhs'(JMAX) will be sent to next cell 261//--------------------------------------------------------------------- 262 263 int i,j,k,isize,ksize,jsize,jstart; 264 double utmp[6*(JMAX+4)]; 265#define utmp(m,i) utmp[(m-1)+6*(i+2)] 266 267 jstart = 0; 268 isize = cell_size(1,c)-end(1,c)-1; 269 jsize = cell_size(2,c)-1; 270 ksize = cell_size(3,c)-end(3,c)-1; 271 272 lhsabinit(lhsa, lhsb, jsize); 273 274 for (k = start(3,c); k <= ksize; k++) { 275 for (i = start(1,c); i <= isize; i++) { 276 277//--------------------------------------------------------------------- 278// This function computes the left hand side for the three y-factors 279//--------------------------------------------------------------------- 280 281//--------------------------------------------------------------------- 282// Compute the indices for storing the tri-diagonal matrix; 283// determine a (labeled f) and n jacobians for cell c 284//--------------------------------------------------------------------- 285 for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) { 286 utmp(1,j) = 1.0e0 / u(1,i,j,k,c); 287 utmp(2,j) = u(2,i,j,k,c); 288 utmp(3,j) = u(3,i,j,k,c); 289 utmp(4,j) = u(4,i,j,k,c); 290 utmp(5,j) = u(5,i,j,k,c); 291 utmp(6,j) = qs(i,j,k,c); 292 } 293 294 for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) { 295 296 tmp1 = utmp(1,j); 297 tmp2 = tmp1 * tmp1; 298 tmp3 = tmp1 * tmp2; 299 300 fjac(1,1,j) = 0.0e+00; 301 fjac(1,2,j) = 0.0e+00; 302 fjac(1,3,j) = 1.0e+00; 303 fjac(1,4,j) = 0.0e+00; 304 fjac(1,5,j) = 0.0e+00; 305 306 fjac(2,1,j) = - ( utmp(2,j)*utmp(3,j) ) 307 * tmp2; 308 fjac(2,2,j) = utmp(3,j) * tmp1; 309 fjac(2,3,j) = utmp(2,j) * tmp1; 310 fjac(2,4,j) = 0.0e+00; 311 fjac(2,5,j) = 0.0e+00; 312 313 fjac(3,1,j) = - ( utmp(3,j)*utmp(3,j)*tmp2) 314 + c2 * utmp(6,j); 315 fjac(3,2,j) = - c2 * utmp(2,j) * tmp1; 316 fjac(3,3,j) = ( 2.0e+00 - c2 ) 317 * utmp(3,j) * tmp1 ; 318 fjac(3,4,j) = - c2 * utmp(4,j) * tmp1 ; 319 fjac(3,5,j) = c2; 320 321 fjac(4,1,j) = - ( utmp(3,j)*utmp(4,j) ) 322 * tmp2; 323 fjac(4,2,j) = 0.0e+00; 324 fjac(4,3,j) = utmp(4,j) * tmp1; 325 fjac(4,4,j) = utmp(3,j) * tmp1; 326 fjac(4,5,j) = 0.0e+00; 327 328 fjac(5,1,j) = ( c2 * 2.0e0 * utmp(6,j) 329 - c1 * utmp(5,j) * tmp1 ) 330 * utmp(3,j) * tmp1 ; 331 fjac(5,2,j) = - c2 * utmp(2,j)*utmp(3,j) 332 * tmp2; 333 fjac(5,3,j) = c1 * utmp(5,j) * tmp1 334 - c2 * ( utmp(6,j) 335 + utmp(3,j)*utmp(3,j) * tmp2 ); 336 fjac(5,4,j) = - c2 * ( utmp(3,j)*utmp(4,j) ) 337 * tmp2; 338 fjac(5,5,j) = c1 * utmp(3,j) * tmp1 ; 339 340 njac(1,1,j) = 0.0e+00; 341 njac(1,2,j) = 0.0e+00; 342 njac(1,3,j) = 0.0e+00; 343 njac(1,4,j) = 0.0e+00; 344 njac(1,5,j) = 0.0e+00; 345 346 njac(2,1,j) = - c3c4 * tmp2 * utmp(2,j); 347 njac(2,2,j) = c3c4 * tmp1; 348 njac(2,3,j) = 0.0e+00; 349 njac(2,4,j) = 0.0e+00; 350 njac(2,5,j) = 0.0e+00; 351 352 njac(3,1,j) = - con43 * c3c4 * tmp2 * utmp(3,j); 353 njac(3,2,j) = 0.0e+00; 354 njac(3,3,j) = con43 * c3c4 * tmp1; 355 njac(3,4,j) = 0.0e+00; 356 njac(3,5,j) = 0.0e+00; 357 358 njac(4,1,j) = - c3c4 * tmp2 * utmp(4,j); 359 njac(4,2,j) = 0.0e+00; 360 njac(4,3,j) = 0.0e+00; 361 njac(4,4,j) = c3c4 * tmp1; 362 njac(4,5,j) = 0.0e+00; 363 364 njac(5,1,j) = - ( c3c4 365 - c1345 ) * tmp3 * SQR(utmp(2,j)) 366 - ( con43 * c3c4 367 - c1345 ) * tmp3 * SQR(utmp(3,j)) 368 - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(4,j)) 369 - c1345 * tmp2 * utmp(5,j); 370 371 njac(5,2,j) = ( c3c4 - c1345 ) * tmp2 * utmp(2,j); 372 njac(5,3,j) = ( con43 * c3c4 373 - c1345 ) * tmp2 * utmp(3,j); 374 njac(5,4,j) = ( c3c4 - c1345 ) * tmp2 * utmp(4,j); 375 njac(5,5,j) = ( c1345 ) * tmp1; 376 377 } 378 379//--------------------------------------------------------------------- 380// now joacobians set, so form left hand side in y direction 381//--------------------------------------------------------------------- 382 for (j = start(2,c); j <= jsize-end(2,c); j++) { 383 384 tmp1 = dt * ty1; 385 tmp2 = dt * ty2; 386 387 lhsa(1,1,j) = - tmp2 * fjac(1,1,j-1) 388 - tmp1 * njac(1,1,j-1) 389 - tmp1 * dy1 ; 390 lhsa(1,2,j) = - tmp2 * fjac(1,2,j-1) 391 - tmp1 * njac(1,2,j-1); 392 lhsa(1,3,j) = - tmp2 * fjac(1,3,j-1) 393 - tmp1 * njac(1,3,j-1); 394 lhsa(1,4,j) = - tmp2 * fjac(1,4,j-1) 395 - tmp1 * njac(1,4,j-1); 396 lhsa(1,5,j) = - tmp2 * fjac(1,5,j-1) 397 - tmp1 * njac(1,5,j-1); 398 399 lhsa(2,1,j) = - tmp2 * fjac(2,1,j-1) 400 - tmp1 * njac(2,1,j-1); 401 lhsa(2,2,j) = - tmp2 * fjac(2,2,j-1) 402 - tmp1 * njac(2,2,j-1) 403 - tmp1 * dy2; 404 lhsa(2,3,j) = - tmp2 * fjac(2,3,j-1) 405 - tmp1 * njac(2,3,j-1); 406 lhsa(2,4,j) = - tmp2 * fjac(2,4,j-1) 407 - tmp1 * njac(2,4,j-1); 408 lhsa(2,5,j) = - tmp2 * fjac(2,5,j-1) 409 - tmp1 * njac(2,5,j-1); 410 411 lhsa(3,1,j) = - tmp2 * fjac(3,1,j-1) 412 - tmp1 * njac(3,1,j-1); 413 lhsa(3,2,j) = - tmp2 * fjac(3,2,j-1) 414 - tmp1 * njac(3,2,j-1); 415 lhsa(3,3,j) = - tmp2 * fjac(3,3,j-1) 416 - tmp1 * njac(3,3,j-1) 417 - tmp1 * dy3 ; 418 lhsa(3,4,j) = - tmp2 * fjac(3,4,j-1) 419 - tmp1 * njac(3,4,j-1); 420 lhsa(3,5,j) = - tmp2 * fjac(3,5,j-1) 421 - tmp1 * njac(3,5,j-1); 422 423 lhsa(4,1,j) = - tmp2 * fjac(4,1,j-1) 424 - tmp1 * njac(4,1,j-1); 425 lhsa(4,2,j) = - tmp2 * fjac(4,2,j-1) 426 - tmp1 * njac(4,2,j-1); 427 lhsa(4,3,j) = - tmp2 * fjac(4,3,j-1) 428 - tmp1 * njac(4,3,j-1); 429 lhsa(4,4,j) = - tmp2 * fjac(4,4,j-1) 430 - tmp1 * njac(4,4,j-1) 431 - tmp1 * dy4; 432 lhsa(4,5,j) = - tmp2 * fjac(4,5,j-1) 433 - tmp1 * njac(4,5,j-1); 434 435 lhsa(5,1,j) = - tmp2 * fjac(5,1,j-1) 436 - tmp1 * njac(5,1,j-1); 437 lhsa(5,2,j) = - tmp2 * fjac(5,2,j-1) 438 - tmp1 * njac(5,2,j-1); 439 lhsa(5,3,j) = - tmp2 * fjac(5,3,j-1) 440 - tmp1 * njac(5,3,j-1); 441 lhsa(5,4,j) = - tmp2 * fjac(5,4,j-1) 442 - tmp1 * njac(5,4,j-1); 443 lhsa(5,5,j) = - tmp2 * fjac(5,5,j-1) 444 - tmp1 * njac(5,5,j-1) 445 - tmp1 * dy5; 446 447 lhsb(1,1,j) = 1.0e+00 448 + tmp1 * 2.0e+00 * njac(1,1,j) 449 + tmp1 * 2.0e+00 * dy1; 450 lhsb(1,2,j) = tmp1 * 2.0e+00 * njac(1,2,j); 451 lhsb(1,3,j) = tmp1 * 2.0e+00 * njac(1,3,j); 452 lhsb(1,4,j) = tmp1 * 2.0e+00 * njac(1,4,j); 453 lhsb(1,5,j) = tmp1 * 2.0e+00 * njac(1,5,j); 454 455 lhsb(2,1,j) = tmp1 * 2.0e+00 * njac(2,1,j); 456 lhsb(2,2,j) = 1.0e+00 457 + tmp1 * 2.0e+00 * njac(2,2,j) 458 + tmp1 * 2.0e+00 * dy2; 459 lhsb(2,3,j) = tmp1 * 2.0e+00 * njac(2,3,j); 460 lhsb(2,4,j) = tmp1 * 2.0e+00 * njac(2,4,j); 461 lhsb(2,5,j) = tmp1 * 2.0e+00 * njac(2,5,j); 462 463 lhsb(3,1,j) = tmp1 * 2.0e+00 * njac(3,1,j); 464 lhsb(3,2,j) = tmp1 * 2.0e+00 * njac(3,2,j); 465 lhsb(3,3,j) = 1.0e+00 466 + tmp1 * 2.0e+00 * njac(3,3,j) 467 + tmp1 * 2.0e+00 * dy3; 468 lhsb(3,4,j) = tmp1 * 2.0e+00 * njac(3,4,j); 469 lhsb(3,5,j) = tmp1 * 2.0e+00 * njac(3,5,j); 470 471 lhsb(4,1,j) = tmp1 * 2.0e+00 * njac(4,1,j); 472 lhsb(4,2,j) = tmp1 * 2.0e+00 * njac(4,2,j); 473 lhsb(4,3,j) = tmp1 * 2.0e+00 * njac(4,3,j); 474 lhsb(4,4,j) = 1.0e+00 475 + tmp1 * 2.0e+00 * njac(4,4,j) 476 + tmp1 * 2.0e+00 * dy4; 477 lhsb(4,5,j) = tmp1 * 2.0e+00 * njac(4,5,j); 478 479 lhsb(5,1,j) = tmp1 * 2.0e+00 * njac(5,1,j); 480 lhsb(5,2,j) = tmp1 * 2.0e+00 * njac(5,2,j); 481 lhsb(5,3,j) = tmp1 * 2.0e+00 * njac(5,3,j); 482 lhsb(5,4,j) = tmp1 * 2.0e+00 * njac(5,4,j); 483 lhsb(5,5,j) = 1.0e+00 484 + tmp1 * 2.0e+00 * njac(5,5,j) 485 + tmp1 * 2.0e+00 * dy5; 486 487 lhsc(1,1,i,j,k,c) = tmp2 * fjac(1,1,j+1) 488 - tmp1 * njac(1,1,j+1) 489 - tmp1 * dy1; 490 lhsc(1,2,i,j,k,c) = tmp2 * fjac(1,2,j+1) 491 - tmp1 * njac(1,2,j+1); 492 lhsc(1,3,i,j,k,c) = tmp2 * fjac(1,3,j+1) 493 - tmp1 * njac(1,3,j+1); 494 lhsc(1,4,i,j,k,c) = tmp2 * fjac(1,4,j+1) 495 - tmp1 * njac(1,4,j+1); 496 lhsc(1,5,i,j,k,c) = tmp2 * fjac(1,5,j+1) 497 - tmp1 * njac(1,5,j+1); 498 499 lhsc(2,1,i,j,k,c) = tmp2 * fjac(2,1,j+1) 500 - tmp1 * njac(2,1,j+1); 501 lhsc(2,2,i,j,k,c) = tmp2 * fjac(2,2,j+1) 502 - tmp1 * njac(2,2,j+1) 503 - tmp1 * dy2; 504 lhsc(2,3,i,j,k,c) = tmp2 * fjac(2,3,j+1) 505 - tmp1 * njac(2,3,j+1); 506 lhsc(2,4,i,j,k,c) = tmp2 * fjac(2,4,j+1) 507 - tmp1 * njac(2,4,j+1); 508 lhsc(2,5,i,j,k,c) = tmp2 * fjac(2,5,j+1) 509 - tmp1 * njac(2,5,j+1); 510 511 lhsc(3,1,i,j,k,c) = tmp2 * fjac(3,1,j+1) 512 - tmp1 * njac(3,1,j+1); 513 lhsc(3,2,i,j,k,c) = tmp2 * fjac(3,2,j+1) 514 - tmp1 * njac(3,2,j+1); 515 lhsc(3,3,i,j,k,c) = tmp2 * fjac(3,3,j+1) 516 - tmp1 * njac(3,3,j+1) 517 - tmp1 * dy3; 518 lhsc(3,4,i,j,k,c) = tmp2 * fjac(3,4,j+1) 519 - tmp1 * njac(3,4,j+1); 520 lhsc(3,5,i,j,k,c) = tmp2 * fjac(3,5,j+1) 521 - tmp1 * njac(3,5,j+1); 522 523 lhsc(4,1,i,j,k,c) = tmp2 * fjac(4,1,j+1) 524 - tmp1 * njac(4,1,j+1); 525 lhsc(4,2,i,j,k,c) = tmp2 * fjac(4,2,j+1) 526 - tmp1 * njac(4,2,j+1); 527 lhsc(4,3,i,j,k,c) = tmp2 * fjac(4,3,j+1) 528 - tmp1 * njac(4,3,j+1); 529 lhsc(4,4,i,j,k,c) = tmp2 * fjac(4,4,j+1) 530 - tmp1 * njac(4,4,j+1) 531 - tmp1 * dy4; 532 lhsc(4,5,i,j,k,c) = tmp2 * fjac(4,5,j+1) 533 - tmp1 * njac(4,5,j+1); 534 535 lhsc(5,1,i,j,k,c) = tmp2 * fjac(5,1,j+1) 536 - tmp1 * njac(5,1,j+1); 537 lhsc(5,2,i,j,k,c) = tmp2 * fjac(5,2,j+1) 538 - tmp1 * njac(5,2,j+1); 539 lhsc(5,3,i,j,k,c) = tmp2 * fjac(5,3,j+1) 540 - tmp1 * njac(5,3,j+1); 541 lhsc(5,4,i,j,k,c) = tmp2 * fjac(5,4,j+1) 542 - tmp1 * njac(5,4,j+1); 543 lhsc(5,5,i,j,k,c) = tmp2 * fjac(5,5,j+1) 544 - tmp1 * njac(5,5,j+1) 545 - tmp1 * dy5; 546 547 } 548 549 550//--------------------------------------------------------------------- 551// outer most do loops - sweeping in i direction 552//--------------------------------------------------------------------- 553 if (first == 1) { 554 555//--------------------------------------------------------------------- 556// multiply c(i,jstart,k) by b_inverse and copy back to c 557// multiply rhs(jstart) by b_inverse(jstart) and copy to rhs 558//--------------------------------------------------------------------- 559 binvcrhs( &lhsb(1,1,jstart), 560 &lhsc(1,1,i,jstart,k,c), 561 &rhs(1,i,jstart,k,c) ); 562 563 } 564 565//--------------------------------------------------------------------- 566// begin inner most do loop 567// do all the elements of the cell unless last 568//--------------------------------------------------------------------- 569 for (j = jstart+first; j <= jsize-last; j++) { 570 571//--------------------------------------------------------------------- 572// subtract A*lhs_vector(j-1) from lhs_vector(j) 573// 574// rhs(j) = rhs(j) - A*rhs(j-1) 575//--------------------------------------------------------------------- 576 matvec_sub(&lhsa(1,1,j), 577 &rhs(1,i,j-1,k,c),&rhs(1,i,j,k,c)); 578 579//--------------------------------------------------------------------- 580// B(j) = B(j) - C(j-1)*A(j) 581//--------------------------------------------------------------------- 582 matmul_sub(&lhsa(1,1,j), 583 &lhsc(1,1,i,j-1,k,c), 584 &lhsb(1,1,j)); 585 586//--------------------------------------------------------------------- 587// multiply c(i,j,k) by b_inverse and copy back to c 588// multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs 589//--------------------------------------------------------------------- 590 binvcrhs( &lhsb(1,1,j), 591 &lhsc(1,1,i,j,k,c), 592 &rhs(1,i,j,k,c) ); 593 594 } 595 596//--------------------------------------------------------------------- 597// Now finish up special cases for last cell 598//--------------------------------------------------------------------- 599 if (last == 1) { 600 601//--------------------------------------------------------------------- 602// rhs(jsize) = rhs(jsize) - A*rhs(jsize-1) 603//--------------------------------------------------------------------- 604 matvec_sub(&lhsa(1,1,jsize), 605 &rhs(1,i,jsize-1,k,c),&rhs(1,i,jsize,k,c)); 606 607//--------------------------------------------------------------------- 608// B(jsize) = B(jsize) - C(jsize-1)*A(jsize) 609// call matmul_sub(aa,i,jsize,k,c, 610// $ cc,i,jsize-1,k,c,bb,i,jsize,k,c) 611//--------------------------------------------------------------------- 612 matmul_sub(&lhsa(1,1,jsize), 613 &lhsc(1,1,i,jsize-1,k,c), 614 &lhsb(1,1,jsize)); 615 616//--------------------------------------------------------------------- 617// multiply rhs(jsize) by b_inverse(jsize) and copy to rhs 618//--------------------------------------------------------------------- 619 binvrhs( &lhsb(1,1,jsize), 620 &rhs(1,i,jsize,k,c) ); 621 622 } 623 } 624 } 625 626 627 return; 628} 629 630 631 632