1//---------------------------------------------------------------------
2//---------------------------------------------------------------------
3#include "header.h"
4#include "mpinpb.h"
5#include "work_lhs.h"
6
7extern void y_sendrecv_solve(int c, int cprev);
8extern void y_sendrecv_back(int c, int cprev);
9extern void y_backsubstitute(int first, int last, int c);
10extern void y_solve_cell(int first, int last, int c);
11
12void y_solve() {
13
14//---------------------------------------------------------------------
15//---------------------------------------------------------------------
16
17//---------------------------------------------------------------------
18//     Performs line solves in Y direction by first factoring
19//     the block-tridiagonal matrix into an upper triangular matrix,
20//     and then performing back substitution to solve for the unknow
21//     vectors of each line.
22//
23//     Make sure we treat elements zero to cell_size in the direction
24//     of the sweep.
25//---------------------------------------------------------------------
26
27      int  c, cprev, stage, first, last, error;
28
29//---------------------------------------------------------------------
30//     in our terminology stage is the number of the cell in the y-direction
31//     i.e. stage = 1 means the start of the line stage=ncells means end
32//---------------------------------------------------------------------
33      for (stage = 1; stage <= ncells; stage++) {
34         c = slice(2,stage);
35//---------------------------------------------------------------------
36//     set last-cell flag
37//---------------------------------------------------------------------
38         first = (stage == 1);
39         last =  (stage == ncells);
40
41        if (stage >1) {
42           cprev = slice(2,stage-1);
43           y_sendrecv_solve(c, cprev);
44        }
45        y_solve_cell(first,last,c);
46      }
47
48//---------------------------------------------------------------------
49//     now perform backsubstitution in reverse direction
50//---------------------------------------------------------------------
51      for (stage = ncells; stage >= 1; stage--) {
52         c = slice(2,stage);
53         first = (stage == 1);
54         last =  (stage == ncells);
55
56         if (stage <ncells) {
57            cprev = slice(2,stage+1);
58            y_sendrecv_back(c, cprev);
59         }
60
61         y_backsubstitute(first,last,c);
62      }
63
64      return;
65}
66
67
68void y_sendrecv_solve(int c, int cprev) {
69
70//---------------------------------------------------------------------
71//     pack up and send C'(jend) and rhs'(jend) for
72//     all i and k
73//---------------------------------------------------------------------
74
75      int i,k,m,n,jsize,ptr,jstart;
76      int phase;
77      int error,buffer_size;
78
79      jsize = cell_size(2,cprev)-1;
80      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
81           (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
82
83//---------------------------------------------------------------------
84//     pack up buffer
85//---------------------------------------------------------------------
86      ptr = 0;
87      for (k = 0; k <= KMAX-1; k++) {
88         for (i = 0; i <= IMAX-1; i++) {
89            for (m = 1; m <= BLOCK_SIZE; m++) {
90               for (n = 1; n <= BLOCK_SIZE; n++) {
91                  in_buffer(ptr+n) = lhsc(m,n,i,jsize,k,cprev);
92               }
93               ptr = ptr+BLOCK_SIZE;
94            }
95            for (n = 1; n <= BLOCK_SIZE; n++) {
96               in_buffer(ptr+n) = rhs(n,i,jsize,k,cprev);
97            }
98            ptr = ptr+BLOCK_SIZE;
99         }
100      }
101
102//---------------------------------------------------------------------
103//     send and receive buffer
104//---------------------------------------------------------------------
105
106      for (phase = 0; phase < 3; phase++) {
107
108        if (send_color[NORTHDIR]==phase)
109          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(2));
110        if (recv_color[NORTHDIR]==phase)
111          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(2));
112      }
113
114//---------------------------------------------------------------------
115//     unpack buffer
116//---------------------------------------------------------------------
117      jstart = 0;
118      ptr = 0;
119      for (k = 0; k <= KMAX-1; k++) {
120         for (i = 0; i <= IMAX-1; i++) {
121            for (m = 1; m <= BLOCK_SIZE; m++) {
122               for (n = 1; n <= BLOCK_SIZE; n++) {
123                  lhsc(m,n,i,jstart-1,k,c) = out_buffer(ptr+n);
124               }
125               ptr = ptr+BLOCK_SIZE;
126            }
127            for (n = 1; n <= BLOCK_SIZE; n++) {
128               rhs(n,i,jstart-1,k,c) = out_buffer(ptr+n);
129            }
130            ptr = ptr+BLOCK_SIZE;
131         }
132      }
133
134      return;
135}
136
137//---------------------------------------------------------------------
138//---------------------------------------------------------------------
139
140void y_sendrecv_back(int c, int cprev) {
141
142//---------------------------------------------------------------------
143//     pack up and send U(jstart) for all i and k
144//---------------------------------------------------------------------
145
146      int i,k,n,ptr,jstart;
147      int phase;
148      int error,buffer_size;
149
150//---------------------------------------------------------------------
151//     Send element 0 to previous processor
152//---------------------------------------------------------------------
153      jstart = 0;
154      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
155      ptr = 0;
156      for (k = 0; k <= KMAX-1; k++) {
157         for (i = 0; i <= IMAX-1; i++) {
158            for (n = 1; n <= BLOCK_SIZE; n++) {
159               in_buffer(ptr+n) = rhs(n,i,jstart,k,cprev);
160            }
161            ptr = ptr+BLOCK_SIZE;
162         }
163      }
164
165//---------------------------------------------------------------------
166//     send and receive buffer
167//---------------------------------------------------------------------
168
169      for (phase = 0; phase < 3; phase++) {
170
171        if (send_color[SOUTHDIR]==phase)
172          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(2));
173        if (recv_color[SOUTHDIR]==phase)
174          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(2));
175      }
176
177//---------------------------------------------------------------------
178//     unpack U(jsize) for all i and k
179//---------------------------------------------------------------------
180
181      ptr = 0;
182      for (k = 0; k <= KMAX-1; k++) {
183         for (i = 0; i <= IMAX-1; i++) {
184            for (n = 1; n <= BLOCK_SIZE; n++) {
185               backsub_info(n,i,k,c) = out_buffer(ptr+n);
186            }
187            ptr = ptr+BLOCK_SIZE;
188         }
189      }
190
191      return;
192}
193
194void y_backsubstitute(int first, int last, int c) {
195
196//---------------------------------------------------------------------
197//---------------------------------------------------------------------
198
199//---------------------------------------------------------------------
200//     back solve: if last cell, then generate U(jsize)=rhs(jsize)
201//     else assume U(jsize) is loaded in un pack backsub_info
202//     so just use it
203//     after call u(jstart) will be sent to next cell
204//---------------------------------------------------------------------
205
206      int i, k;
207      int m,n,j,jsize,isize,ksize,jstart;
208
209      jstart = 0;
210      isize = cell_size(1,c)-end(1,c)-1      ;
211      jsize = cell_size(2,c)-1;
212      ksize = cell_size(3,c)-end(3,c)-1;
213      if (last == 0) {
214         for (k = start(3,c); k <= ksize; k++) {
215            for (i = start(1,c); i <= isize; i++) {
216//---------------------------------------------------------------------
217//     U(jsize) uses info from previous cell if not last cell
218//---------------------------------------------------------------------
219               for (m = 1; m <= BLOCK_SIZE; m++) {
220                  for (n = 1; n <= BLOCK_SIZE; n++) {
221                     rhs(m,i,jsize,k,c) = rhs(m,i,jsize,k,c)
222                          - lhsc(m,n,i,jsize,k,c)*
223                          backsub_info(n,i,k,c);
224                  }
225               }
226            }
227         }
228      }
229      for (k = start(3,c); k <= ksize; k++) {
230         for (j = jsize-1; j >= jstart; j--) {
231            for (i = start(1,c); i <= isize; i++) {
232               for (m = 1; m <= BLOCK_SIZE; m++) {
233                  for (n = 1; n <= BLOCK_SIZE; n++) {
234                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)
235                          - lhsc(m,n,i,j,k,c)*rhs(n,i,j+1,k,c);
236                  }
237               }
238            }
239         }
240      }
241
242      return;
243}
244
245//---------------------------------------------------------------------
246//---------------------------------------------------------------------
247
248void y_solve_cell(int first,int last,int c) {
249
250//---------------------------------------------------------------------
251//---------------------------------------------------------------------
252
253//---------------------------------------------------------------------
254//     performs guaussian elimination on this cell.
255//
256//     assumes that unpacking routines for non-first cells
257//     preload C' and rhs' from previous cell.
258//
259//     assumed send happens outside this routine, but that
260//     c'(JMAX) and rhs'(JMAX) will be sent to next cell
261//---------------------------------------------------------------------
262
263      int i,j,k,isize,ksize,jsize,jstart;
264      double utmp[6*(JMAX+4)];
265#define utmp(m,i) utmp[(m-1)+6*(i+2)]
266
267      jstart = 0;
268      isize = cell_size(1,c)-end(1,c)-1;
269      jsize = cell_size(2,c)-1;
270      ksize = cell_size(3,c)-end(3,c)-1;
271
272      lhsabinit(lhsa, lhsb, jsize);
273
274      for (k = start(3,c); k <= ksize; k++) {
275         for (i = start(1,c); i <= isize; i++) {
276
277//---------------------------------------------------------------------
278//     This function computes the left hand side for the three y-factors
279//---------------------------------------------------------------------
280
281//---------------------------------------------------------------------
282//     Compute the indices for storing the tri-diagonal matrix;
283//     determine a (labeled f) and n jacobians for cell c
284//---------------------------------------------------------------------
285            for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) {
286               utmp(1,j) = 1.0e0 / u(1,i,j,k,c);
287               utmp(2,j) = u(2,i,j,k,c);
288               utmp(3,j) = u(3,i,j,k,c);
289               utmp(4,j) = u(4,i,j,k,c);
290               utmp(5,j) = u(5,i,j,k,c);
291               utmp(6,j) = qs(i,j,k,c);
292            }
293
294            for (j = start(2,c)-1; j <= cell_size(2,c)-end(2,c); j++) {
295
296               tmp1 = utmp(1,j);
297               tmp2 = tmp1 * tmp1;
298               tmp3 = tmp1 * tmp2;
299
300               fjac(1,1,j) = 0.0e+00;
301               fjac(1,2,j) = 0.0e+00;
302               fjac(1,3,j) = 1.0e+00;
303               fjac(1,4,j) = 0.0e+00;
304               fjac(1,5,j) = 0.0e+00;
305
306               fjac(2,1,j) = - ( utmp(2,j)*utmp(3,j) )
307                    * tmp2;
308               fjac(2,2,j) = utmp(3,j) * tmp1;
309               fjac(2,3,j) = utmp(2,j) * tmp1;
310               fjac(2,4,j) = 0.0e+00;
311               fjac(2,5,j) = 0.0e+00;
312
313               fjac(3,1,j) = - ( utmp(3,j)*utmp(3,j)*tmp2)
314                    + c2 * utmp(6,j);
315               fjac(3,2,j) = - c2 *  utmp(2,j) * tmp1;
316               fjac(3,3,j) = ( 2.0e+00 - c2 )
317                    *  utmp(3,j) * tmp1 ;
318               fjac(3,4,j) = - c2 * utmp(4,j) * tmp1 ;
319               fjac(3,5,j) = c2;
320
321               fjac(4,1,j) = - ( utmp(3,j)*utmp(4,j) )
322                    * tmp2;
323               fjac(4,2,j) = 0.0e+00;
324               fjac(4,3,j) = utmp(4,j) * tmp1;
325               fjac(4,4,j) = utmp(3,j) * tmp1;
326               fjac(4,5,j) = 0.0e+00;
327
328               fjac(5,1,j) = ( c2 * 2.0e0 * utmp(6,j)
329                    - c1 * utmp(5,j) * tmp1 )
330                    * utmp(3,j) * tmp1 ;
331               fjac(5,2,j) = - c2 * utmp(2,j)*utmp(3,j)
332                    * tmp2;
333               fjac(5,3,j) = c1 * utmp(5,j) * tmp1
334                    - c2 * ( utmp(6,j)
335                    + utmp(3,j)*utmp(3,j) * tmp2 );
336               fjac(5,4,j) = - c2 * ( utmp(3,j)*utmp(4,j) )
337                    * tmp2;
338               fjac(5,5,j) = c1 * utmp(3,j) * tmp1 ;
339
340               njac(1,1,j) = 0.0e+00;
341               njac(1,2,j) = 0.0e+00;
342               njac(1,3,j) = 0.0e+00;
343               njac(1,4,j) = 0.0e+00;
344               njac(1,5,j) = 0.0e+00;
345
346               njac(2,1,j) = - c3c4 * tmp2 * utmp(2,j);
347               njac(2,2,j) =   c3c4 * tmp1;
348               njac(2,3,j) =   0.0e+00;
349               njac(2,4,j) =   0.0e+00;
350               njac(2,5,j) =   0.0e+00;
351
352               njac(3,1,j) = - con43 * c3c4 * tmp2 * utmp(3,j);
353               njac(3,2,j) =   0.0e+00;
354               njac(3,3,j) =   con43 * c3c4 * tmp1;
355               njac(3,4,j) =   0.0e+00;
356               njac(3,5,j) =   0.0e+00;
357
358               njac(4,1,j) = - c3c4 * tmp2 * utmp(4,j);
359               njac(4,2,j) =   0.0e+00;
360               njac(4,3,j) =   0.0e+00;
361               njac(4,4,j) =   c3c4 * tmp1;
362               njac(4,5,j) =   0.0e+00;
363
364               njac(5,1,j) = - (  c3c4
365                    - c1345 ) * tmp3 * SQR(utmp(2,j))
366                    - ( con43 * c3c4
367                    - c1345 ) * tmp3 * SQR(utmp(3,j))
368                    - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(4,j))
369                    - c1345 * tmp2 * utmp(5,j);
370
371               njac(5,2,j) = (  c3c4 - c1345 ) * tmp2 * utmp(2,j);
372               njac(5,3,j) = ( con43 * c3c4
373                    - c1345 ) * tmp2 * utmp(3,j);
374               njac(5,4,j) = ( c3c4 - c1345 ) * tmp2 * utmp(4,j);
375               njac(5,5,j) = ( c1345 ) * tmp1;
376
377            }
378
379//---------------------------------------------------------------------
380//     now joacobians set, so form left hand side in y direction
381//---------------------------------------------------------------------
382            for (j = start(2,c); j <= jsize-end(2,c); j++) {
383
384               tmp1 = dt * ty1;
385               tmp2 = dt * ty2;
386
387               lhsa(1,1,j) = - tmp2 * fjac(1,1,j-1)
388                    - tmp1 * njac(1,1,j-1)
389                    - tmp1 * dy1 ;
390               lhsa(1,2,j) = - tmp2 * fjac(1,2,j-1)
391                    - tmp1 * njac(1,2,j-1);
392               lhsa(1,3,j) = - tmp2 * fjac(1,3,j-1)
393                    - tmp1 * njac(1,3,j-1);
394               lhsa(1,4,j) = - tmp2 * fjac(1,4,j-1)
395                    - tmp1 * njac(1,4,j-1);
396               lhsa(1,5,j) = - tmp2 * fjac(1,5,j-1)
397                    - tmp1 * njac(1,5,j-1);
398
399               lhsa(2,1,j) = - tmp2 * fjac(2,1,j-1)
400                    - tmp1 * njac(2,1,j-1);
401               lhsa(2,2,j) = - tmp2 * fjac(2,2,j-1)
402                    - tmp1 * njac(2,2,j-1)
403                    - tmp1 * dy2;
404               lhsa(2,3,j) = - tmp2 * fjac(2,3,j-1)
405                    - tmp1 * njac(2,3,j-1);
406               lhsa(2,4,j) = - tmp2 * fjac(2,4,j-1)
407                    - tmp1 * njac(2,4,j-1);
408               lhsa(2,5,j) = - tmp2 * fjac(2,5,j-1)
409                    - tmp1 * njac(2,5,j-1);
410
411               lhsa(3,1,j) = - tmp2 * fjac(3,1,j-1)
412                    - tmp1 * njac(3,1,j-1);
413               lhsa(3,2,j) = - tmp2 * fjac(3,2,j-1)
414                    - tmp1 * njac(3,2,j-1);
415               lhsa(3,3,j) = - tmp2 * fjac(3,3,j-1)
416                    - tmp1 * njac(3,3,j-1)
417                    - tmp1 * dy3 ;
418               lhsa(3,4,j) = - tmp2 * fjac(3,4,j-1)
419                    - tmp1 * njac(3,4,j-1);
420               lhsa(3,5,j) = - tmp2 * fjac(3,5,j-1)
421                    - tmp1 * njac(3,5,j-1);
422
423               lhsa(4,1,j) = - tmp2 * fjac(4,1,j-1)
424                    - tmp1 * njac(4,1,j-1);
425               lhsa(4,2,j) = - tmp2 * fjac(4,2,j-1)
426                    - tmp1 * njac(4,2,j-1);
427               lhsa(4,3,j) = - tmp2 * fjac(4,3,j-1)
428                    - tmp1 * njac(4,3,j-1);
429               lhsa(4,4,j) = - tmp2 * fjac(4,4,j-1)
430                    - tmp1 * njac(4,4,j-1)
431                    - tmp1 * dy4;
432               lhsa(4,5,j) = - tmp2 * fjac(4,5,j-1)
433                    - tmp1 * njac(4,5,j-1);
434
435               lhsa(5,1,j) = - tmp2 * fjac(5,1,j-1)
436                    - tmp1 * njac(5,1,j-1);
437               lhsa(5,2,j) = - tmp2 * fjac(5,2,j-1)
438                    - tmp1 * njac(5,2,j-1);
439               lhsa(5,3,j) = - tmp2 * fjac(5,3,j-1)
440                    - tmp1 * njac(5,3,j-1);
441               lhsa(5,4,j) = - tmp2 * fjac(5,4,j-1)
442                    - tmp1 * njac(5,4,j-1);
443               lhsa(5,5,j) = - tmp2 * fjac(5,5,j-1)
444                    - tmp1 * njac(5,5,j-1)
445                    - tmp1 * dy5;
446
447               lhsb(1,1,j) = 1.0e+00
448                    + tmp1 * 2.0e+00 * njac(1,1,j)
449                    + tmp1 * 2.0e+00 * dy1;
450               lhsb(1,2,j) = tmp1 * 2.0e+00 * njac(1,2,j);
451               lhsb(1,3,j) = tmp1 * 2.0e+00 * njac(1,3,j);
452               lhsb(1,4,j) = tmp1 * 2.0e+00 * njac(1,4,j);
453               lhsb(1,5,j) = tmp1 * 2.0e+00 * njac(1,5,j);
454
455               lhsb(2,1,j) = tmp1 * 2.0e+00 * njac(2,1,j);
456               lhsb(2,2,j) = 1.0e+00
457                    + tmp1 * 2.0e+00 * njac(2,2,j)
458                    + tmp1 * 2.0e+00 * dy2;
459               lhsb(2,3,j) = tmp1 * 2.0e+00 * njac(2,3,j);
460               lhsb(2,4,j) = tmp1 * 2.0e+00 * njac(2,4,j);
461               lhsb(2,5,j) = tmp1 * 2.0e+00 * njac(2,5,j);
462
463               lhsb(3,1,j) = tmp1 * 2.0e+00 * njac(3,1,j);
464               lhsb(3,2,j) = tmp1 * 2.0e+00 * njac(3,2,j);
465               lhsb(3,3,j) = 1.0e+00
466                    + tmp1 * 2.0e+00 * njac(3,3,j)
467                    + tmp1 * 2.0e+00 * dy3;
468               lhsb(3,4,j) = tmp1 * 2.0e+00 * njac(3,4,j);
469               lhsb(3,5,j) = tmp1 * 2.0e+00 * njac(3,5,j);
470
471               lhsb(4,1,j) = tmp1 * 2.0e+00 * njac(4,1,j);
472               lhsb(4,2,j) = tmp1 * 2.0e+00 * njac(4,2,j);
473               lhsb(4,3,j) = tmp1 * 2.0e+00 * njac(4,3,j);
474               lhsb(4,4,j) = 1.0e+00
475                    + tmp1 * 2.0e+00 * njac(4,4,j)
476                    + tmp1 * 2.0e+00 * dy4;
477               lhsb(4,5,j) = tmp1 * 2.0e+00 * njac(4,5,j);
478
479               lhsb(5,1,j) = tmp1 * 2.0e+00 * njac(5,1,j);
480               lhsb(5,2,j) = tmp1 * 2.0e+00 * njac(5,2,j);
481               lhsb(5,3,j) = tmp1 * 2.0e+00 * njac(5,3,j);
482               lhsb(5,4,j) = tmp1 * 2.0e+00 * njac(5,4,j);
483               lhsb(5,5,j) = 1.0e+00
484                    + tmp1 * 2.0e+00 * njac(5,5,j)
485                    + tmp1 * 2.0e+00 * dy5;
486
487               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,j+1)
488                    - tmp1 * njac(1,1,j+1)
489                    - tmp1 * dy1;
490               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,j+1)
491                    - tmp1 * njac(1,2,j+1);
492               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,j+1)
493                    - tmp1 * njac(1,3,j+1);
494               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,j+1)
495                    - tmp1 * njac(1,4,j+1);
496               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,j+1)
497                    - tmp1 * njac(1,5,j+1);
498
499               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,j+1)
500                    - tmp1 * njac(2,1,j+1);
501               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,j+1)
502                    - tmp1 * njac(2,2,j+1)
503                    - tmp1 * dy2;
504               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,j+1)
505                    - tmp1 * njac(2,3,j+1);
506               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,j+1)
507                    - tmp1 * njac(2,4,j+1);
508               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,j+1)
509                    - tmp1 * njac(2,5,j+1);
510
511               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,j+1)
512                    - tmp1 * njac(3,1,j+1);
513               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,j+1)
514                    - tmp1 * njac(3,2,j+1);
515               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,j+1)
516                    - tmp1 * njac(3,3,j+1)
517                    - tmp1 * dy3;
518               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,j+1)
519                    - tmp1 * njac(3,4,j+1);
520               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,j+1)
521                    - tmp1 * njac(3,5,j+1);
522
523               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,j+1)
524                    - tmp1 * njac(4,1,j+1);
525               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,j+1)
526                    - tmp1 * njac(4,2,j+1);
527               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,j+1)
528                    - tmp1 * njac(4,3,j+1);
529               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,j+1)
530                    - tmp1 * njac(4,4,j+1)
531                    - tmp1 * dy4;
532               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,j+1)
533                    - tmp1 * njac(4,5,j+1);
534
535               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,j+1)
536                    - tmp1 * njac(5,1,j+1);
537               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,j+1)
538                    - tmp1 * njac(5,2,j+1);
539               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,j+1)
540                    - tmp1 * njac(5,3,j+1);
541               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,j+1)
542                    - tmp1 * njac(5,4,j+1);
543               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,j+1)
544                    - tmp1 * njac(5,5,j+1)
545                    - tmp1 * dy5;
546
547            }
548
549
550//---------------------------------------------------------------------
551//     outer most do loops - sweeping in i direction
552//---------------------------------------------------------------------
553            if (first == 1) {
554
555//---------------------------------------------------------------------
556//     multiply c(i,jstart,k) by b_inverse and copy back to c
557//     multiply rhs(jstart) by b_inverse(jstart) and copy to rhs
558//---------------------------------------------------------------------
559               binvcrhs( &lhsb(1,1,jstart),
560                              &lhsc(1,1,i,jstart,k,c),
561                              &rhs(1,i,jstart,k,c) );
562
563            }
564
565//---------------------------------------------------------------------
566//     begin inner most do loop
567//     do all the elements of the cell unless last
568//---------------------------------------------------------------------
569            for (j = jstart+first; j <= jsize-last; j++) {
570
571//---------------------------------------------------------------------
572//     subtract A*lhs_vector(j-1) from lhs_vector(j)
573//
574//     rhs(j) = rhs(j) - A*rhs(j-1)
575//---------------------------------------------------------------------
576               matvec_sub(&lhsa(1,1,j),
577                               &rhs(1,i,j-1,k,c),&rhs(1,i,j,k,c));
578
579//---------------------------------------------------------------------
580//     B(j) = B(j) - C(j-1)*A(j)
581//---------------------------------------------------------------------
582               matmul_sub(&lhsa(1,1,j),
583                               &lhsc(1,1,i,j-1,k,c),
584                               &lhsb(1,1,j));
585
586//---------------------------------------------------------------------
587//     multiply c(i,j,k) by b_inverse and copy back to c
588//     multiply rhs(i,1,k) by b_inverse(i,1,k) and copy to rhs
589//---------------------------------------------------------------------
590               binvcrhs( &lhsb(1,1,j),
591                              &lhsc(1,1,i,j,k,c),
592                              &rhs(1,i,j,k,c) );
593
594            }
595
596//---------------------------------------------------------------------
597//     Now finish up special cases for last cell
598//---------------------------------------------------------------------
599            if (last == 1) {
600
601//---------------------------------------------------------------------
602//     rhs(jsize) = rhs(jsize) - A*rhs(jsize-1)
603//---------------------------------------------------------------------
604               matvec_sub(&lhsa(1,1,jsize),
605                               &rhs(1,i,jsize-1,k,c),&rhs(1,i,jsize,k,c));
606
607//---------------------------------------------------------------------
608//     B(jsize) = B(jsize) - C(jsize-1)*A(jsize)
609//     call matmul_sub(aa,i,jsize,k,c,
610//     $              cc,i,jsize-1,k,c,bb,i,jsize,k,c)
611//---------------------------------------------------------------------
612               matmul_sub(&lhsa(1,1,jsize),
613                               &lhsc(1,1,i,jsize-1,k,c),
614                               &lhsb(1,1,jsize));
615
616//---------------------------------------------------------------------
617//     multiply rhs(jsize) by b_inverse(jsize) and copy to rhs
618//---------------------------------------------------------------------
619               binvrhs( &lhsb(1,1,jsize),
620                             &rhs(1,i,jsize,k,c) );
621
622            }
623         }
624      }
625
626
627      return;
628}
629
630
631
632