1//---------------------------------------------------------------------
2//---------------------------------------------------------------------
3#include "header.h"
4#include "mpinpb.h"
5#include "work_lhs.h"
6
7extern void z_sendrecv_solve(int c, int cprev);
8extern void z_sendrecv_back(int c, int cprev);
9extern void z_backsubstitute(int first, int last, int c);
10extern void z_solve_cell(int first, int last, int c);
11
12void z_solve() {
13
14//---------------------------------------------------------------------
15//---------------------------------------------------------------------
16
17//---------------------------------------------------------------------
18//     Performs line solves in Z direction by first factoring
19//     the block-tridiagonal matrix into an upper triangular matrix,
20//     and then performing back substitution to solve for the unknow
21//     vectors of each line.
22//
23//     Make sure we treat elements zero to cell_size in the direction
24//     of the sweep.
25//---------------------------------------------------------------------
26
27      int  c, cprev, stage, first, last, error;
28
29//---------------------------------------------------------------------
30//     in our terminology stage is the number of the cell in the y-direction
31//     i.e. stage = 1 means the start of the line stage=ncells means end
32//---------------------------------------------------------------------
33      for (stage = 1; stage <= ncells; stage++) {
34         c = slice(3,stage);
35//---------------------------------------------------------------------
36//     set last-cell flag
37//---------------------------------------------------------------------
38         first = (stage == 1);
39         last =  (stage == ncells);
40
41        if (stage >1) {
42           cprev = slice(3,stage-1);
43           z_sendrecv_solve(c, cprev);
44        }
45        z_solve_cell(first,last,c);
46      }
47
48//---------------------------------------------------------------------
49//     now perform backsubstitution in reverse direction
50//---------------------------------------------------------------------
51      for (stage = ncells; stage >= 1; stage--) {
52         c = slice(3,stage);
53         first = (stage == 1);
54         last =  (stage == ncells);
55
56         if (stage <ncells) {
57            cprev = slice(3,stage+1);
58            z_sendrecv_back(c, cprev);
59         }
60
61         z_backsubstitute(first,last,c);
62      }
63
64      return;
65}
66
67//---------------------------------------------------------------------
68//---------------------------------------------------------------------
69
70void z_unpack_solve_info(int c) {
71//---------------------------------------------------------------------
72//---------------------------------------------------------------------
73
74//---------------------------------------------------------------------
75//     unpack C'(-1) and rhs'(-1) for
76//     all i and j
77//---------------------------------------------------------------------
78
79      int i,j,m,n,ptr,kstart ;
80
81      kstart = 0;
82      ptr = 0;
83      for (j = 0; j <= JMAX-1; j++) {
84         for (i = 0; i <= IMAX-1; i++) {
85            for (m = 1; m <= BLOCK_SIZE; m++) {
86               for (n = 1; n <= BLOCK_SIZE; n++) {
87                  lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n);
88               }
89               ptr = ptr+BLOCK_SIZE;
90            }
91            for (n = 1; n <= BLOCK_SIZE; n++) {
92               rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n);
93            }
94            ptr = ptr+BLOCK_SIZE;
95         }
96      }
97
98      return;
99}
100
101
102void z_sendrecv_solve(int c, int cprev) {
103
104//---------------------------------------------------------------------
105//     pack up and send C'(kend) and rhs'(kend) for
106//     all i and j
107//---------------------------------------------------------------------
108
109      int i,j,m,n,ksize,ptr,kstart;
110      int phase;
111      int error,buffer_size;
112
113      ksize = cell_size(3,cprev)-1;
114      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*
115           (BLOCK_SIZE*BLOCK_SIZE + BLOCK_SIZE);
116
117//---------------------------------------------------------------------
118//     pack up buffer
119//---------------------------------------------------------------------
120      ptr = 0;
121      for (j = 0; j <= JMAX-1; j++) {
122         for (i = 0; i <= IMAX-1; i++) {
123            for (m = 1; m <= BLOCK_SIZE; m++) {
124               for (n = 1; n <= BLOCK_SIZE; n++) {
125                  in_buffer(ptr+n) = lhsc(m,n,i,j,ksize,cprev);
126               }
127               ptr = ptr+BLOCK_SIZE;
128            }
129            for (n = 1; n <= BLOCK_SIZE; n++) {
130               in_buffer(ptr+n) = rhs(n,i,j,ksize,cprev);
131            }
132            ptr = ptr+BLOCK_SIZE;
133         }
134      }
135
136//---------------------------------------------------------------------
137//     send and receive buffer
138//---------------------------------------------------------------------
139
140      for (phase = 0; phase < 3; phase++) {
141
142        if (send_color[TOPDIR]==phase)
143          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), successor(3));
144        if (recv_color[TOPDIR]==phase)
145          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), predecessor(3));
146      }
147
148//---------------------------------------------------------------------
149//     unpack buffer
150//---------------------------------------------------------------------
151      kstart = 0;
152      ptr = 0;
153      for (j = 0; j <= JMAX-1; j++) {
154         for (i = 0; i <= IMAX-1; i++) {
155            for (m = 1; m <= BLOCK_SIZE; m++) {
156               for (n = 1; n <= BLOCK_SIZE; n++) {
157                  lhsc(m,n,i,j,kstart-1,c) = out_buffer(ptr+n);
158               }
159               ptr = ptr+BLOCK_SIZE;
160            }
161            for (n = 1; n <= BLOCK_SIZE; n++) {
162               rhs(n,i,j,kstart-1,c) = out_buffer(ptr+n);
163            }
164            ptr = ptr+BLOCK_SIZE;
165         }
166      }
167
168      return;
169}
170
171//---------------------------------------------------------------------
172//---------------------------------------------------------------------
173
174void z_sendrecv_back(int c, int cprev) {
175
176//---------------------------------------------------------------------
177//     pack up and send U(jstart) for all i and j
178//---------------------------------------------------------------------
179
180      int i,j,n,ptr,kstart;
181      int phase;
182      int error,buffer_size;
183
184//---------------------------------------------------------------------
185//     Send element 0 to previous processor
186//---------------------------------------------------------------------
187      kstart = 0;
188      buffer_size=MAX_CELL_DIM*MAX_CELL_DIM*BLOCK_SIZE;
189      ptr = 0;
190      for (j = 0; j <= JMAX-1; j++) {
191         for (i = 0; i <= IMAX-1; i++) {
192            for (n = 1; n <= BLOCK_SIZE; n++) {
193               in_buffer(ptr+n) = rhs(n,i,j,kstart,cprev);
194            }
195            ptr = ptr+BLOCK_SIZE;
196         }
197      }
198
199//---------------------------------------------------------------------
200//     send and receive buffer
201//---------------------------------------------------------------------
202
203      for (phase = 0; phase < 3; phase++) {
204
205        if (send_color[BOTTOMDIR]==phase)
206          RCCE_send((char*)in_buffer, buffer_size*sizeof(double), predecessor(3));
207        if (recv_color[BOTTOMDIR]==phase)
208          RCCE_recv((char*)out_buffer, buffer_size*sizeof(double), successor(3));
209      }
210
211//---------------------------------------------------------------------
212//     unpack U(ksize) for all i and j
213//---------------------------------------------------------------------
214
215      ptr = 0;
216      for (j = 0; j <= JMAX-1; j++) {
217         for (i = 0; i <= IMAX-1; i++) {
218            for (n = 1; n <= BLOCK_SIZE; n++) {
219               backsub_info(n,i,j,c) = out_buffer(ptr+n);
220            }
221            ptr = ptr+BLOCK_SIZE;
222         }
223      }
224
225      return;
226}
227
228
229//---------------------------------------------------------------------
230//---------------------------------------------------------------------
231
232void z_backsubstitute(int first, int last, int c) {
233
234//---------------------------------------------------------------------
235//---------------------------------------------------------------------
236
237//---------------------------------------------------------------------
238//     back solve: if last cell, then generate U(ksize)=rhs(ksize)
239//     else assume U(ksize) is loaded in un pack backsub_info
240//     so just use it
241//     after call u(kstart) will be sent to next cell
242//---------------------------------------------------------------------
243
244      int i, k;
245      int m,n,j,jsize,isize,ksize,kstart;
246
247      kstart = 0;
248      isize = cell_size(1,c)-end(1,c)-1      ;
249      jsize = cell_size(2,c)-end(2,c)-1;
250      ksize = cell_size(3,c)-1;
251      if (last == 0) {
252         for (j = start(2,c); j <= jsize; j++) {
253            for (i = start(1,c); i <= isize; i++) {
254//---------------------------------------------------------------------
255//     U(jsize) uses info from previous cell if not last cell
256//---------------------------------------------------------------------
257               for (m = 1; m <= BLOCK_SIZE; m++) {
258                  for (n = 1; n <= BLOCK_SIZE; n++) {
259                     rhs(m,i,j,ksize,c) = rhs(m,i,j,ksize,c)
260                          - lhsc(m,n,i,j,ksize,c)*
261                          backsub_info(n,i,j,c);
262                  }
263               }
264            }
265         }
266      }
267      for (k = ksize-1; k >= kstart; k--) {
268         for (j = start(2,c); j <= jsize; j++) {
269            for (i = start(1,c); i <= isize; i++) {
270               for (m = 1; m <= BLOCK_SIZE; m++) {
271                  for (n = 1; n <= BLOCK_SIZE; n++) {
272                     rhs(m,i,j,k,c) = rhs(m,i,j,k,c)
273                          - lhsc(m,n,i,j,k,c)*rhs(n,i,j,k+1,c);
274                  }
275               }
276            }
277         }
278      }
279
280      return;
281}
282
283//---------------------------------------------------------------------
284//---------------------------------------------------------------------
285
286void z_solve_cell(int first,int last,int c) {
287
288//---------------------------------------------------------------------
289//---------------------------------------------------------------------
290
291//---------------------------------------------------------------------
292//     performs guaussian elimination on this cell.
293//
294//     assumes that unpacking routines for non-first cells
295//     preload C' and rhs' from previous cell.
296//
297//     assumed send happens outside this routine, but that
298//     c'(KMAX) and rhs'(KMAX) will be sent to next cell.
299//---------------------------------------------------------------------
300
301      int i,j,k,isize,ksize,jsize,kstart;
302      double utmp[6*(KMAX+4)];
303#define utmp(m,i) utmp[(m-1)+6*(i+2)]
304
305      kstart = 0;
306      isize = cell_size(1,c)-end(1,c)-1;
307      jsize = cell_size(2,c)-end(2,c)-1;
308      ksize = cell_size(3,c)-1;
309
310      lhsabinit(lhsa, lhsb, ksize);
311
312      for (j = start(2,c); j <= jsize; j++) {
313         for (i = start(1,c); i <= isize; i++) {
314
315//---------------------------------------------------------------------
316//     This function computes the left hand side for the three z-factors
317//---------------------------------------------------------------------
318
319//---------------------------------------------------------------------
320//     Compute the indices for storing the block-diagonal matrix;
321//     determine c (labeled f) and s jacobians for cell c
322//---------------------------------------------------------------------
323            for (k = start(3,c)-1; k <= cell_size(3,c)-end(3,c); k++) {
324               utmp(1,k) = 1.0e0 / u(1,i,j,k,c);
325               utmp(2,k) = u(2,i,j,k,c);
326               utmp(3,k) = u(3,i,j,k,c);
327               utmp(4,k) = u(4,i,j,k,c);
328               utmp(5,k) = u(5,i,j,k,c);
329               utmp(6,k) = qs(i,j,k,c);
330            }
331
332            for (k = start(3,c)-1; k <= cell_size(3,c)-end(3,c); k++) {
333
334               tmp1 = utmp(1,k);
335               tmp2 = tmp1 * tmp1;
336               tmp3 = tmp1 * tmp2;
337
338               fjac(1,1,k) = 0.0e+00;
339               fjac(1,2,k) = 0.0e+00;
340               fjac(1,3,k) = 0.0e+00;
341               fjac(1,4,k) = 1.0e+00;
342               fjac(1,5,k) = 0.0e+00;
343
344               fjac(2,1,k) = - ( utmp(2,k)*utmp(4,k) )
345                    * tmp2 ;
346               fjac(2,2,k) = utmp(4,k) * tmp1;
347               fjac(2,3,k) = 0.0e+00;
348               fjac(2,4,k) = utmp(2,k) * tmp1;
349               fjac(2,5,k) = 0.0e+00;
350
351               fjac(3,1,k) = - ( utmp(3,k)*utmp(4,k) )
352                    * tmp2 ;
353               fjac(3,2,k) = 0.0e+00;
354               fjac(3,3,k) = utmp(4,k) * tmp1;
355               fjac(3,4,k) = utmp(3,k) * tmp1;
356               fjac(3,5,k) = 0.0e+00;
357
358               fjac(4,1,k) = - (utmp(4,k)*utmp(4,k) * tmp2 )
359                    + c2 * utmp(6,k);
360               fjac(4,2,k) = - c2 *  utmp(2,k) * tmp1 ;
361               fjac(4,3,k) = - c2 *  utmp(3,k) * tmp1;
362               fjac(4,4,k) = ( 2.0e+00 - c2 )
363                    *  utmp(4,k) * tmp1 ;
364               fjac(4,5,k) = c2;
365
366               fjac(5,1,k) = ( c2 * 2.0e0 * utmp(6,k)
367                    - c1 * ( utmp(5,k) * tmp1 ) )
368                    * ( utmp(4,k) * tmp1 );
369               fjac(5,2,k) = - c2 * ( utmp(2,k)*utmp(4,k) )
370                    * tmp2 ;
371               fjac(5,3,k) = - c2 * ( utmp(3,k)*utmp(4,k) )
372                    * tmp2;
373               fjac(5,4,k) = c1 * ( utmp(5,k) * tmp1 )
374                    - c2 * ( utmp(6,k)
375                    + utmp(4,k)*utmp(4,k) * tmp2 );
376               fjac(5,5,k) = c1 * utmp(4,k) * tmp1;
377
378               njac(1,1,k) = 0.0e+00;
379               njac(1,2,k) = 0.0e+00;
380               njac(1,3,k) = 0.0e+00;
381               njac(1,4,k) = 0.0e+00;
382               njac(1,5,k) = 0.0e+00;
383
384               njac(2,1,k) = - c3c4 * tmp2 * utmp(2,k);
385               njac(2,2,k) =   c3c4 * tmp1;
386               njac(2,3,k) =   0.0e+00;
387               njac(2,4,k) =   0.0e+00;
388               njac(2,5,k) =   0.0e+00;
389
390               njac(3,1,k) = - c3c4 * tmp2 * utmp(3,k);
391               njac(3,2,k) =   0.0e+00;
392               njac(3,3,k) =   c3c4 * tmp1;
393               njac(3,4,k) =   0.0e+00;
394               njac(3,5,k) =   0.0e+00;
395
396               njac(4,1,k) = - con43 * c3c4 * tmp2 * utmp(4,k);
397               njac(4,2,k) =   0.0e+00;
398               njac(4,3,k) =   0.0e+00;
399               njac(4,4,k) =   con43 * c3 * c4 * tmp1;
400               njac(4,5,k) =   0.0e+00;
401
402               njac(5,1,k) = - (  c3c4
403                    - c1345 ) * tmp3 * SQR(utmp(2,k))
404                    - ( c3c4 - c1345 ) * tmp3 * SQR(utmp(3,k))
405                    - ( con43 * c3c4
406                    - c1345 ) * tmp3 * SQR(utmp(4,k))
407                    - c1345 * tmp2 * utmp(5,k);
408
409               njac(5,2,k) = (  c3c4 - c1345 ) * tmp2 * utmp(2,k);
410               njac(5,3,k) = (  c3c4 - c1345 ) * tmp2 * utmp(3,k);
411               njac(5,4,k) = ( con43 * c3c4
412                    - c1345 ) * tmp2 * utmp(4,k);
413               njac(5,5,k) = ( c1345 )* tmp1;
414
415
416            }
417
418//---------------------------------------------------------------------
419//     now joacobians set, so form left hand side in z direction
420//---------------------------------------------------------------------
421            for (k = start(3,c); k <= ksize-end(3,c); k++) {
422
423               tmp1 = dt * tz1;
424               tmp2 = dt * tz2;
425
426               lhsa(1,1,k) = - tmp2 * fjac(1,1,k-1)
427                    - tmp1 * njac(1,1,k-1)
428                    - tmp1 * dz1 ;
429               lhsa(1,2,k) = - tmp2 * fjac(1,2,k-1)
430                    - tmp1 * njac(1,2,k-1);
431               lhsa(1,3,k) = - tmp2 * fjac(1,3,k-1)
432                    - tmp1 * njac(1,3,k-1);
433               lhsa(1,4,k) = - tmp2 * fjac(1,4,k-1)
434                    - tmp1 * njac(1,4,k-1);
435               lhsa(1,5,k) = - tmp2 * fjac(1,5,k-1)
436                    - tmp1 * njac(1,5,k-1);
437
438               lhsa(2,1,k) = - tmp2 * fjac(2,1,k-1)
439                    - tmp1 * njac(2,1,k-1);
440               lhsa(2,2,k) = - tmp2 * fjac(2,2,k-1)
441                    - tmp1 * njac(2,2,k-1)
442                    - tmp1 * dz2;
443               lhsa(2,3,k) = - tmp2 * fjac(2,3,k-1)
444                    - tmp1 * njac(2,3,k-1);
445               lhsa(2,4,k) = - tmp2 * fjac(2,4,k-1)
446                    - tmp1 * njac(2,4,k-1);
447               lhsa(2,5,k) = - tmp2 * fjac(2,5,k-1)
448                    - tmp1 * njac(2,5,k-1);
449
450               lhsa(3,1,k) = - tmp2 * fjac(3,1,k-1)
451                    - tmp1 * njac(3,1,k-1);
452               lhsa(3,2,k) = - tmp2 * fjac(3,2,k-1)
453                    - tmp1 * njac(3,2,k-1);
454               lhsa(3,3,k) = - tmp2 * fjac(3,3,k-1)
455                    - tmp1 * njac(3,3,k-1)
456                    - tmp1 * dz3 ;
457               lhsa(3,4,k) = - tmp2 * fjac(3,4,k-1)
458                    - tmp1 * njac(3,4,k-1);
459               lhsa(3,5,k) = - tmp2 * fjac(3,5,k-1)
460                    - tmp1 * njac(3,5,k-1);
461
462               lhsa(4,1,k) = - tmp2 * fjac(4,1,k-1)
463                    - tmp1 * njac(4,1,k-1);
464               lhsa(4,2,k) = - tmp2 * fjac(4,2,k-1)
465                    - tmp1 * njac(4,2,k-1);
466               lhsa(4,3,k) = - tmp2 * fjac(4,3,k-1)
467                    - tmp1 * njac(4,3,k-1);
468               lhsa(4,4,k) = - tmp2 * fjac(4,4,k-1)
469                    - tmp1 * njac(4,4,k-1)
470                    - tmp1 * dz4;
471               lhsa(4,5,k) = - tmp2 * fjac(4,5,k-1)
472                    - tmp1 * njac(4,5,k-1);
473
474               lhsa(5,1,k) = - tmp2 * fjac(5,1,k-1)
475                    - tmp1 * njac(5,1,k-1);
476               lhsa(5,2,k) = - tmp2 * fjac(5,2,k-1)
477                    - tmp1 * njac(5,2,k-1);
478               lhsa(5,3,k) = - tmp2 * fjac(5,3,k-1)
479                    - tmp1 * njac(5,3,k-1);
480               lhsa(5,4,k) = - tmp2 * fjac(5,4,k-1)
481                    - tmp1 * njac(5,4,k-1);
482               lhsa(5,5,k) = - tmp2 * fjac(5,5,k-1)
483                    - tmp1 * njac(5,5,k-1)
484                    - tmp1 * dz5;
485
486               lhsb(1,1,k) = 1.0e+00
487                    + tmp1 * 2.0e+00 * njac(1,1,k)
488                    + tmp1 * 2.0e+00 * dz1;
489               lhsb(1,2,k) = tmp1 * 2.0e+00 * njac(1,2,k);
490               lhsb(1,3,k) = tmp1 * 2.0e+00 * njac(1,3,k);
491               lhsb(1,4,k) = tmp1 * 2.0e+00 * njac(1,4,k);
492               lhsb(1,5,k) = tmp1 * 2.0e+00 * njac(1,5,k);
493
494               lhsb(2,1,k) = tmp1 * 2.0e+00 * njac(2,1,k);
495               lhsb(2,2,k) = 1.0e+00
496                    + tmp1 * 2.0e+00 * njac(2,2,k)
497                    + tmp1 * 2.0e+00 * dz2;
498               lhsb(2,3,k) = tmp1 * 2.0e+00 * njac(2,3,k);
499               lhsb(2,4,k) = tmp1 * 2.0e+00 * njac(2,4,k);
500               lhsb(2,5,k) = tmp1 * 2.0e+00 * njac(2,5,k);
501
502               lhsb(3,1,k) = tmp1 * 2.0e+00 * njac(3,1,k);
503               lhsb(3,2,k) = tmp1 * 2.0e+00 * njac(3,2,k);
504               lhsb(3,3,k) = 1.0e+00
505                    + tmp1 * 2.0e+00 * njac(3,3,k)
506                    + tmp1 * 2.0e+00 * dz3;
507               lhsb(3,4,k) = tmp1 * 2.0e+00 * njac(3,4,k);
508               lhsb(3,5,k) = tmp1 * 2.0e+00 * njac(3,5,k);
509
510               lhsb(4,1,k) = tmp1 * 2.0e+00 * njac(4,1,k);
511               lhsb(4,2,k) = tmp1 * 2.0e+00 * njac(4,2,k);
512               lhsb(4,3,k) = tmp1 * 2.0e+00 * njac(4,3,k);
513               lhsb(4,4,k) = 1.0e+00
514                    + tmp1 * 2.0e+00 * njac(4,4,k)
515                    + tmp1 * 2.0e+00 * dz4;
516               lhsb(4,5,k) = tmp1 * 2.0e+00 * njac(4,5,k);
517
518               lhsb(5,1,k) = tmp1 * 2.0e+00 * njac(5,1,k);
519               lhsb(5,2,k) = tmp1 * 2.0e+00 * njac(5,2,k);
520               lhsb(5,3,k) = tmp1 * 2.0e+00 * njac(5,3,k);
521               lhsb(5,4,k) = tmp1 * 2.0e+00 * njac(5,4,k);
522               lhsb(5,5,k) = 1.0e+00
523                    + tmp1 * 2.0e+00 * njac(5,5,k)
524                    + tmp1 * 2.0e+00 * dz5;
525
526               lhsc(1,1,i,j,k,c) =  tmp2 * fjac(1,1,k+1)
527                    - tmp1 * njac(1,1,k+1)
528                    - tmp1 * dz1;
529               lhsc(1,2,i,j,k,c) =  tmp2 * fjac(1,2,k+1)
530                    - tmp1 * njac(1,2,k+1);
531               lhsc(1,3,i,j,k,c) =  tmp2 * fjac(1,3,k+1)
532                    - tmp1 * njac(1,3,k+1);
533               lhsc(1,4,i,j,k,c) =  tmp2 * fjac(1,4,k+1)
534                    - tmp1 * njac(1,4,k+1);
535               lhsc(1,5,i,j,k,c) =  tmp2 * fjac(1,5,k+1)
536                    - tmp1 * njac(1,5,k+1);
537
538               lhsc(2,1,i,j,k,c) =  tmp2 * fjac(2,1,k+1)
539                    - tmp1 * njac(2,1,k+1);
540               lhsc(2,2,i,j,k,c) =  tmp2 * fjac(2,2,k+1)
541                    - tmp1 * njac(2,2,k+1)
542                    - tmp1 * dz2;
543               lhsc(2,3,i,j,k,c) =  tmp2 * fjac(2,3,k+1)
544                    - tmp1 * njac(2,3,k+1);
545               lhsc(2,4,i,j,k,c) =  tmp2 * fjac(2,4,k+1)
546                    - tmp1 * njac(2,4,k+1);
547               lhsc(2,5,i,j,k,c) =  tmp2 * fjac(2,5,k+1)
548                    - tmp1 * njac(2,5,k+1);
549
550               lhsc(3,1,i,j,k,c) =  tmp2 * fjac(3,1,k+1)
551                    - tmp1 * njac(3,1,k+1);
552               lhsc(3,2,i,j,k,c) =  tmp2 * fjac(3,2,k+1)
553                    - tmp1 * njac(3,2,k+1);
554               lhsc(3,3,i,j,k,c) =  tmp2 * fjac(3,3,k+1)
555                    - tmp1 * njac(3,3,k+1)
556                    - tmp1 * dz3;
557               lhsc(3,4,i,j,k,c) =  tmp2 * fjac(3,4,k+1)
558                    - tmp1 * njac(3,4,k+1);
559               lhsc(3,5,i,j,k,c) =  tmp2 * fjac(3,5,k+1)
560                    - tmp1 * njac(3,5,k+1);
561
562               lhsc(4,1,i,j,k,c) =  tmp2 * fjac(4,1,k+1)
563                    - tmp1 * njac(4,1,k+1);
564               lhsc(4,2,i,j,k,c) =  tmp2 * fjac(4,2,k+1)
565                    - tmp1 * njac(4,2,k+1);
566               lhsc(4,3,i,j,k,c) =  tmp2 * fjac(4,3,k+1)
567                    - tmp1 * njac(4,3,k+1);
568               lhsc(4,4,i,j,k,c) =  tmp2 * fjac(4,4,k+1)
569                    - tmp1 * njac(4,4,k+1)
570                    - tmp1 * dz4;
571               lhsc(4,5,i,j,k,c) =  tmp2 * fjac(4,5,k+1)
572                    - tmp1 * njac(4,5,k+1);
573
574               lhsc(5,1,i,j,k,c) =  tmp2 * fjac(5,1,k+1)
575                    - tmp1 * njac(5,1,k+1);
576               lhsc(5,2,i,j,k,c) =  tmp2 * fjac(5,2,k+1)
577                    - tmp1 * njac(5,2,k+1);
578               lhsc(5,3,i,j,k,c) =  tmp2 * fjac(5,3,k+1)
579                    - tmp1 * njac(5,3,k+1);
580               lhsc(5,4,i,j,k,c) =  tmp2 * fjac(5,4,k+1)
581                    - tmp1 * njac(5,4,k+1);
582               lhsc(5,5,i,j,k,c) =  tmp2 * fjac(5,5,k+1)
583                    - tmp1 * njac(5,5,k+1)
584                    - tmp1 * dz5;
585
586            }
587
588
589//---------------------------------------------------------------------
590//     outer most do loops - sweeping in i direction
591//---------------------------------------------------------------------
592            if (first == 1) {
593
594//---------------------------------------------------------------------
595//     multiply c(i,j,kstart) by b_inverse and copy back to c
596//     multiply rhs(kstart) by b_inverse(kstart) and copy to rhs
597//---------------------------------------------------------------------
598               binvcrhs( &lhsb(1,1,kstart),
599                              &lhsc(1,1,i,j,kstart,c),
600                              &rhs(1,i,j,kstart,c) );
601
602            }
603
604//---------------------------------------------------------------------
605//     begin inner most do loop
606//     do all the elements of the cell unless last
607//---------------------------------------------------------------------
608            for (k = kstart+first; k <= ksize-last; k++) {
609
610//---------------------------------------------------------------------
611//     subtract A*lhs_vector(k-1) from lhs_vector(k)
612//
613//     rhs(k) = rhs(k) - A*rhs(k-1)
614//---------------------------------------------------------------------
615               matvec_sub(&lhsa(1,1,k),
616                               &rhs(1,i,j,k-1,c),&rhs(1,i,j,k,c));
617
618//---------------------------------------------------------------------
619//     B(k) = B(k) - C(k-1)*A(k)
620//     call matmul_sub(aa,i,j,k,c,cc,i,j,k-1,c,bb,i,j,k,c)
621//---------------------------------------------------------------------
622               matmul_sub(&lhsa(1,1,k),
623                               &lhsc(1,1,i,j,k-1,c),
624                               &lhsb(1,1,k));
625
626//---------------------------------------------------------------------
627//     multiply c(i,j,k) by b_inverse and copy back to c
628//     multiply rhs(i,j,1) by b_inverse(i,j,1) and copy to rhs
629//---------------------------------------------------------------------
630               binvcrhs( &lhsb(1,1,k),
631                              &lhsc(1,1,i,j,k,c),
632                              &rhs(1,i,j,k,c) );
633
634            }
635
636//---------------------------------------------------------------------
637//     Now finish up special cases for last cell
638//---------------------------------------------------------------------
639            if (last == 1) {
640
641//---------------------------------------------------------------------
642//     rhs(ksize) = rhs(ksize) - A*rhs(ksize-1)
643//---------------------------------------------------------------------
644               matvec_sub(&lhsa(1,1,ksize),
645                               &rhs(1,i,j,ksize-1,c),&rhs(1,i,j,ksize,c));
646
647//---------------------------------------------------------------------
648//     B(ksize) = B(ksize) - C(ksize-1)*A(ksize)
649//     call matmul_sub(aa,i,j,ksize,c,
650//     $              cc,i,j,ksize-1,c,bb,i,j,ksize,c)
651//---------------------------------------------------------------------
652               matmul_sub(&lhsa(1,1,ksize),
653                               &lhsc(1,1,i,j,ksize-1,c),
654                               &lhsb(1,1,ksize));
655
656//---------------------------------------------------------------------
657//     multiply rhs(ksize) by b_inverse(ksize) and copy to rhs
658//---------------------------------------------------------------------
659               binvrhs( &lhsb(1,1,ksize),
660                             &rhs(1,i,j,ksize,c) );
661
662            }
663         }
664      }
665
666
667      return;
668}
669
670
671
672
673
674
675