1//---------------------------------------------------------------------
2//---------------------------------------------------------------------
3//
4//  header.h
5//
6//---------------------------------------------------------------------
7//---------------------------------------------------------------------
8#ifndef __HEADER_H
9#define __HEADER_H
10
11//---------------------------------------------------------------------
12// The following include file is generated automatically by the
13// "setparams" utility. It defines
14//      maxcells:      the square root of the maximum number of processors
15//      problem_size:  12, 64, 102, 162 (for class T, A, B, C)
16//      dt_default:    default time step for this problem size if no
17//                     config file
18//      niter_default: default number of iterations for this problem size
19//---------------------------------------------------------------------
20
21#include "npbparams.h"
22#include "RCCE.h"
23//we introduce the next definition to avoid confusing the compiler, which
24//sometimes thinks the variable class is a reserved word
25#define class _class_
26#include "common.h"
27
28#define AA 0
29#define BB 1
30#define CC 2
31#define BLOCK_SIZE 5
32
33#define EAST   2000
34#define WEST   3000
35#define NORTH  4000
36#define SOUTH  5000
37#define BOTTOM 6000
38#define TOP    7000
39
40#define WESTDIR   0
41#define EASTDIR   1
42#define SOUTHDIR  2
43#define NORTHDIR  3
44#define BOTTOMDIR 4
45#define TOPDIR    5
46
47#define MAX_CELL_DIM ((PROBLEM_SIZE/MAXCELLS)+1)
48#define IMAX MAX_CELL_DIM
49#define JMAX MAX_CELL_DIM
50#define KMAX MAX_CELL_DIM
51
52#define BUF_SIZE (MAX_CELL_DIM*MAX_CELL_DIM*(MAXCELLS-1)*60+1)
53
54#define SQR(x) (x)*(x)
55
56#define grid_points(m) grid_points[m-1]
57#define ce(m,n) ce[(m-1)+5*(n-1)]
58#define cell_coord(m,n) cell_coord[(m-1)+3*(n-1)]
59#define cell_low(m,n) cell_low[(m-1)+3*(n-1)]
60#define cell_high(m,n) cell_high[(m-1)+3*(n-1)]
61#define cell_size(m,n) cell_size[(m-1)+3*(n-1)]
62#define predecessor(m) predecessor[m-1]
63#define slice(m,n) slice[(m-1)+3*(n-1)]
64#define grid_size(m) grid_size[m-1]
65#define successor(m) successor[m-1]
66#define start(m,n) start[(m-1)+3*(n-1)]
67#define end(m,n) end[(m-1)+3*(n-1)]
68#define us(i,j,k,c) us[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
69#define vs(i,j,k,c) vs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
70#define ws(i,j,k,c) ws[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
71#define qs(i,j,k,c) qs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
72#define rho_i(i,j,k,c) rho_i[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
73#define square(i,j,k,c) square[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))]
74#define forcing(m,i,j,k,c) forcing[(m-1)+5*(i+IMAX*(j+JMAX*(k+KMAX*(c-1))))]
75#define u(m,i,j,k,c) u[(m-1)+5*((i+2)+(IMAX+4)*((j+2)+(JMAX+4)*((k+2)+(KMAX+4)*(c-1))))]
76#define rhs(m,i,j,k,c) rhs[(m-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1))))]
77#define lhsc(m,n,i,j,k,c) lhsc[(m-1)+5*((n-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1)))))]
78#define backsub_info(m,i,j,c) backsub_info[(m-1)+5*((i)+(IMAX+1)*((j)+(JMAX+1)*(c-1)))]
79#define in_buffer(i) in_buffer[i-1]
80#define out_buffer(i) out_buffer[i-1]
81#define cv(m) cv[m+2]
82#define rhon(m) rhon[m+2]
83#define rhos(m) rhos[m+2]
84#define rhoq(m) rhoq[m+2]
85#define cuf(m) cuf[m+2]
86#define q(m) q[m+2]
87#define ue(m,n) ue[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
88#define buf(m,n) buf[(m+2)+(MAX_CELL_DIM+4)*(n-1)]
89#define sum(m) sum[m-1]
90#define xce_sub(m) xce_sub[m-1]
91
92
93#ifdef G_MAIN
94      int     ncells, grid_points[3];
95      double  elapsed_time;
96
97      double  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
98                        dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
99                        dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
100                        ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2,
101                        xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
102                        dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
103                        yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
104                        zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
105                        dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
106                        dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
107                        c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
108                        dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
109                        c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
110                        c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
111
112      int     cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3],
113              cell_high[MAXCELLS*3],  cell_size[MAXCELLS*3],
114              predecessor[3],         slice[MAXCELLS*3],
115              grid_size[3],           successor[3],
116              start[MAXCELLS*3],      end[MAXCELLS*3];
117
118      double
119         us      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
120         vs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
121         ws      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
122         qs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
123         rho_i   [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
124         square  [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
125         forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
126         u       [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
127         rhs     [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
128         lhsc    [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
129         backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
130         in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
131
132      double cv[MAX_CELL_DIM+4],   rhon[MAX_CELL_DIM+4],
133             rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
134             cuf[MAX_CELL_DIM+4],  q[MAX_CELL_DIM+4],
135             ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
136
137      int  west_size, east_size, bottom_size, top_size,
138               north_size, south_size, start_send_west,
139               start_send_east, start_send_south, start_send_north,
140               start_send_bottom, start_send_top, start_recv_west,
141               start_recv_east, start_recv_south, start_recv_north,
142               start_recv_bottom, start_recv_top;
143//
144//     These are used by btio
145//
146      int collbuf_nodes, collbuf_size, iosize,
147              idump, record_length,
148              idump_sub, rd_interval;
149      double sum[NITER_DEFAULT], xce_sub[5];
150      long int iseek;
151      int    send_color[6], recv_color[6];
152#else
153extern int     ncells, grid_points[3];
154extern double  elapsed_time;
155
156extern double  tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
157                        dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
158                        dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
159                        ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2,
160                        xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
161                        dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
162                        yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
163                        zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
164                        dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
165                        dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
166                        c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
167                        dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
168                        c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
169                        c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16;
170
171extern int    cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3],
172              cell_high[MAXCELLS*3],  cell_size[MAXCELLS*3],
173              predecessor[3],         slice[MAXCELLS*3],
174              grid_size[3],           successor[3],
175              start[MAXCELLS*3],      end[MAXCELLS*3];
176
177extern double
178         us      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
179         vs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
180         ws      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
181         qs      [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
182         rho_i   [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
183         square  [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS],
184         forcing [5*IMAX*JMAX*KMAX*MAXCELLS],
185         u       [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS],
186         rhs     [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
187         lhsc    [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS],
188         backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS],
189         in_buffer[BUF_SIZE], out_buffer[BUF_SIZE];
190
191extern double cv[MAX_CELL_DIM+4],   rhon[MAX_CELL_DIM+4],
192             rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4],
193             cuf[MAX_CELL_DIM+4],  q[MAX_CELL_DIM+4],
194             ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5];
195
196extern int  west_size, east_size, bottom_size, top_size,
197               north_size, south_size, start_send_west,
198               start_send_east, start_send_south, start_send_north,
199               start_send_bottom, start_send_top, start_recv_west,
200               start_recv_east, start_recv_south, start_recv_north,
201               start_recv_bottom, start_recv_top;
202
203//
204//     These are used by btio
205//
206extern int collbuf_nodes, collbuf_size, iosize,
207              idump, record_length,
208              idump_sub, rd_interval;
209extern double sum[NITER_DEFAULT], xce_sub[5];
210extern long int iseek;
211extern int    send_color[6], recv_color[6];
212
213#endif /*G_MAIN*/
214
215extern void matvec_sub(double ablock[], double avec[], double bvec[]);
216extern void matmul_sub(double ablock[], double bblock[], double cblock[]);
217extern void binvcrhs( double lhs[], double c[], double r[] );
218extern void binvrhs( double lhs[], double r[] );
219extern void exact_solution(double xi,double eta,double zeta,double dtemp[]);
220
221extern int setup_mpi(int *argc, char ***argv);
222extern void make_set(void);
223extern void set_constants(void);
224extern void lhsinit(void);
225extern void lhsabinit(double lhsa[], double lhsb[], int size);
226extern void initialize(void);
227extern void exact_rhs(void);
228extern void compute_buffer_size(int c);
229extern void adi(void);
230extern void compute_rhs(void);
231extern void copy_faces(void);
232extern void x_solve(void);
233extern void y_solve(void);
234extern void z_solve(void);
235extern void add(void);
236extern void verify(int niter, char *class, int *verified);
237extern void error_norm(double rms[]);
238extern void rhs_norm(double rms[]);
239
240extern void setup_btio(void);
241extern void output_timestep(void);
242extern void btio_cleanup(void);
243extern void btio_verify(int *verified);
244extern void accumulate_norms(double xce[]);
245extern void clear_timestep(void);
246
247#endif
248
249#ifdef _OPENMP
250#pragma omp threadprivate (cell_coord, cell_low, cell_high,  cell_size)
251#pragma omp threadprivate (predecessor, slice, grid_size, successor)
252#pragma omp threadprivate (start, end)
253
254#pragma omp threadprivate (ncells, grid_points, elapsed_time)
255#pragma omp threadprivate (tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, \
256                           dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, \
257                           dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, \
258                           ce, dxmax, dymax, dzmax, xxcon1, xxcon2, \
259                           xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, \
260                           dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, \
261                           yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, \
262                           zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, \
263                           dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, \
264                           dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, \
265                           c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, \
266                           dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, \
267                           c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, \
268                           c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16)
269
270#pragma omp threadprivate (us, vs, ws, qs, rho_i, square, forcing, \
271                           u, rhs, lhsc, backsub_info, in_buffer, out_buffer)
272
273#pragma omp threadprivate (cv, rhon, rhos, rhoq, cuf, q, ue, buf)
274
275#pragma omp threadprivate (west_size, east_size, bottom_size, top_size, \
276                           north_size, south_size, start_send_west, \
277                           start_send_east, start_send_south, start_send_north, \
278                           start_send_bottom, start_send_top, start_recv_west, \
279                           start_recv_east, start_recv_south, start_recv_north, \
280                           start_recv_bottom, start_recv_top, send_color, recv_color)
281//
282//     These are used by btio
283//
284#pragma omp threadprivate (collbuf_nodes, collbuf_size, iosize, idump,\
285                           record_length, idump_sub, rd_interval, \
286                           sum, xce_sub, iseek)
287#endif
288