1//--------------------------------------------------------------------- 2//--------------------------------------------------------------------- 3// 4// header.h 5// 6//--------------------------------------------------------------------- 7//--------------------------------------------------------------------- 8#ifndef __HEADER_H 9#define __HEADER_H 10 11//--------------------------------------------------------------------- 12// The following include file is generated automatically by the 13// "setparams" utility. It defines 14// maxcells: the square root of the maximum number of processors 15// problem_size: 12, 64, 102, 162 (for class T, A, B, C) 16// dt_default: default time step for this problem size if no 17// config file 18// niter_default: default number of iterations for this problem size 19//--------------------------------------------------------------------- 20 21#include "npbparams.h" 22#include "RCCE.h" 23//we introduce the next definition to avoid confusing the compiler, which 24//sometimes thinks the variable class is a reserved word 25#define class _class_ 26#include "common.h" 27 28#define AA 0 29#define BB 1 30#define CC 2 31#define BLOCK_SIZE 5 32 33#define EAST 2000 34#define WEST 3000 35#define NORTH 4000 36#define SOUTH 5000 37#define BOTTOM 6000 38#define TOP 7000 39 40#define WESTDIR 0 41#define EASTDIR 1 42#define SOUTHDIR 2 43#define NORTHDIR 3 44#define BOTTOMDIR 4 45#define TOPDIR 5 46 47#define MAX_CELL_DIM ((PROBLEM_SIZE/MAXCELLS)+1) 48#define IMAX MAX_CELL_DIM 49#define JMAX MAX_CELL_DIM 50#define KMAX MAX_CELL_DIM 51 52#define BUF_SIZE (MAX_CELL_DIM*MAX_CELL_DIM*(MAXCELLS-1)*60+1) 53 54#define SQR(x) (x)*(x) 55 56#define grid_points(m) grid_points[m-1] 57#define ce(m,n) ce[(m-1)+5*(n-1)] 58#define cell_coord(m,n) cell_coord[(m-1)+3*(n-1)] 59#define cell_low(m,n) cell_low[(m-1)+3*(n-1)] 60#define cell_high(m,n) cell_high[(m-1)+3*(n-1)] 61#define cell_size(m,n) cell_size[(m-1)+3*(n-1)] 62#define predecessor(m) predecessor[m-1] 63#define slice(m,n) slice[(m-1)+3*(n-1)] 64#define grid_size(m) grid_size[m-1] 65#define successor(m) successor[m-1] 66#define start(m,n) start[(m-1)+3*(n-1)] 67#define end(m,n) end[(m-1)+3*(n-1)] 68#define us(i,j,k,c) us[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))] 69#define vs(i,j,k,c) vs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))] 70#define ws(i,j,k,c) ws[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))] 71#define qs(i,j,k,c) qs[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))] 72#define rho_i(i,j,k,c) rho_i[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))] 73#define square(i,j,k,c) square[(i+1)+(IMAX+2)*((j+1)+(JMAX+2)*((k+1)+(KMAX+2)*(c-1)))] 74#define forcing(m,i,j,k,c) forcing[(m-1)+5*(i+IMAX*(j+JMAX*(k+KMAX*(c-1))))] 75#define u(m,i,j,k,c) u[(m-1)+5*((i+2)+(IMAX+4)*((j+2)+(JMAX+4)*((k+2)+(KMAX+4)*(c-1))))] 76#define rhs(m,i,j,k,c) rhs[(m-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1))))] 77#define lhsc(m,n,i,j,k,c) lhsc[(m-1)+5*((n-1)+5*((i+1)+(IMAX+1)*((j+1)+(JMAX+1)*((k+1)+(KMAX+1)*(c-1)))))] 78#define backsub_info(m,i,j,c) backsub_info[(m-1)+5*((i)+(IMAX+1)*((j)+(JMAX+1)*(c-1)))] 79#define in_buffer(i) in_buffer[i-1] 80#define out_buffer(i) out_buffer[i-1] 81#define cv(m) cv[m+2] 82#define rhon(m) rhon[m+2] 83#define rhos(m) rhos[m+2] 84#define rhoq(m) rhoq[m+2] 85#define cuf(m) cuf[m+2] 86#define q(m) q[m+2] 87#define ue(m,n) ue[(m+2)+(MAX_CELL_DIM+4)*(n-1)] 88#define buf(m,n) buf[(m+2)+(MAX_CELL_DIM+4)*(n-1)] 89#define sum(m) sum[m-1] 90#define xce_sub(m) xce_sub[m-1] 91 92 93#ifdef G_MAIN 94 int ncells, grid_points[3]; 95 double elapsed_time; 96 97 double tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 98 dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 99 dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 100 ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2, 101 xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, 102 dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, 103 yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, 104 zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 105 dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 106 dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 107 c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, 108 dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 109 c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 110 c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16; 111 112 int cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3], 113 cell_high[MAXCELLS*3], cell_size[MAXCELLS*3], 114 predecessor[3], slice[MAXCELLS*3], 115 grid_size[3], successor[3], 116 start[MAXCELLS*3], end[MAXCELLS*3]; 117 118 double 119 us [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 120 vs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 121 ws [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 122 qs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 123 rho_i [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 124 square [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 125 forcing [5*IMAX*JMAX*KMAX*MAXCELLS], 126 u [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS], 127 rhs [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS], 128 lhsc [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS], 129 backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS], 130 in_buffer[BUF_SIZE], out_buffer[BUF_SIZE]; 131 132 double cv[MAX_CELL_DIM+4], rhon[MAX_CELL_DIM+4], 133 rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4], 134 cuf[MAX_CELL_DIM+4], q[MAX_CELL_DIM+4], 135 ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5]; 136 137 int west_size, east_size, bottom_size, top_size, 138 north_size, south_size, start_send_west, 139 start_send_east, start_send_south, start_send_north, 140 start_send_bottom, start_send_top, start_recv_west, 141 start_recv_east, start_recv_south, start_recv_north, 142 start_recv_bottom, start_recv_top; 143// 144// These are used by btio 145// 146 int collbuf_nodes, collbuf_size, iosize, 147 idump, record_length, 148 idump_sub, rd_interval; 149 double sum[NITER_DEFAULT], xce_sub[5]; 150 long int iseek; 151 int send_color[6], recv_color[6]; 152#else 153extern int ncells, grid_points[3]; 154extern double elapsed_time; 155 156extern double tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, 157 dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, 158 dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, 159 ce[5*13], dxmax, dymax, dzmax, xxcon1, xxcon2, 160 xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, 161 dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, 162 yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, 163 zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, 164 dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, 165 dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, 166 c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, 167 dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, 168 c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, 169 c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16; 170 171extern int cell_coord[MAXCELLS*3], cell_low[MAXCELLS*3], 172 cell_high[MAXCELLS*3], cell_size[MAXCELLS*3], 173 predecessor[3], slice[MAXCELLS*3], 174 grid_size[3], successor[3], 175 start[MAXCELLS*3], end[MAXCELLS*3]; 176 177extern double 178 us [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 179 vs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 180 ws [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 181 qs [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 182 rho_i [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 183 square [(IMAX+2)*(JMAX+2)*(KMAX+2)*MAXCELLS], 184 forcing [5*IMAX*JMAX*KMAX*MAXCELLS], 185 u [5*(IMAX+4)*(JMAX+4)*(KMAX+4)*MAXCELLS], 186 rhs [5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS], 187 lhsc [5*5*(IMAX+1)*(JMAX+1)*(KMAX+1)*MAXCELLS], 188 backsub_info [5*(MAX_CELL_DIM+1)*(MAX_CELL_DIM+1)*MAXCELLS], 189 in_buffer[BUF_SIZE], out_buffer[BUF_SIZE]; 190 191extern double cv[MAX_CELL_DIM+4], rhon[MAX_CELL_DIM+4], 192 rhos[MAX_CELL_DIM+4], rhoq[MAX_CELL_DIM+4], 193 cuf[MAX_CELL_DIM+4], q[MAX_CELL_DIM+4], 194 ue[(MAX_CELL_DIM+4)*5], buf[(MAX_CELL_DIM+4)*5]; 195 196extern int west_size, east_size, bottom_size, top_size, 197 north_size, south_size, start_send_west, 198 start_send_east, start_send_south, start_send_north, 199 start_send_bottom, start_send_top, start_recv_west, 200 start_recv_east, start_recv_south, start_recv_north, 201 start_recv_bottom, start_recv_top; 202 203// 204// These are used by btio 205// 206extern int collbuf_nodes, collbuf_size, iosize, 207 idump, record_length, 208 idump_sub, rd_interval; 209extern double sum[NITER_DEFAULT], xce_sub[5]; 210extern long int iseek; 211extern int send_color[6], recv_color[6]; 212 213#endif /*G_MAIN*/ 214 215extern void matvec_sub(double ablock[], double avec[], double bvec[]); 216extern void matmul_sub(double ablock[], double bblock[], double cblock[]); 217extern void binvcrhs( double lhs[], double c[], double r[] ); 218extern void binvrhs( double lhs[], double r[] ); 219extern void exact_solution(double xi,double eta,double zeta,double dtemp[]); 220 221extern int setup_mpi(int *argc, char ***argv); 222extern void make_set(void); 223extern void set_constants(void); 224extern void lhsinit(void); 225extern void lhsabinit(double lhsa[], double lhsb[], int size); 226extern void initialize(void); 227extern void exact_rhs(void); 228extern void compute_buffer_size(int c); 229extern void adi(void); 230extern void compute_rhs(void); 231extern void copy_faces(void); 232extern void x_solve(void); 233extern void y_solve(void); 234extern void z_solve(void); 235extern void add(void); 236extern void verify(int niter, char *class, int *verified); 237extern void error_norm(double rms[]); 238extern void rhs_norm(double rms[]); 239 240extern void setup_btio(void); 241extern void output_timestep(void); 242extern void btio_cleanup(void); 243extern void btio_verify(int *verified); 244extern void accumulate_norms(double xce[]); 245extern void clear_timestep(void); 246 247#endif 248 249#ifdef _OPENMP 250#pragma omp threadprivate (cell_coord, cell_low, cell_high, cell_size) 251#pragma omp threadprivate (predecessor, slice, grid_size, successor) 252#pragma omp threadprivate (start, end) 253 254#pragma omp threadprivate (ncells, grid_points, elapsed_time) 255#pragma omp threadprivate (tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3, \ 256 dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4, \ 257 dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt, \ 258 ce, dxmax, dymax, dzmax, xxcon1, xxcon2, \ 259 xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1, \ 260 dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4, \ 261 yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1, \ 262 zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1, \ 263 dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1, \ 264 dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2, \ 265 c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt, \ 266 dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1, \ 267 c2dtty1, c2dttz1, comz1, comz4, comz5, comz6, \ 268 c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16) 269 270#pragma omp threadprivate (us, vs, ws, qs, rho_i, square, forcing, \ 271 u, rhs, lhsc, backsub_info, in_buffer, out_buffer) 272 273#pragma omp threadprivate (cv, rhon, rhos, rhoq, cuf, q, ue, buf) 274 275#pragma omp threadprivate (west_size, east_size, bottom_size, top_size, \ 276 north_size, south_size, start_send_west, \ 277 start_send_east, start_send_south, start_send_north, \ 278 start_send_bottom, start_send_top, start_recv_west, \ 279 start_recv_east, start_recv_south, start_recv_north, \ 280 start_recv_bottom, start_recv_top, send_color, recv_color) 281// 282// These are used by btio 283// 284#pragma omp threadprivate (collbuf_nodes, collbuf_size, iosize, idump,\ 285 record_length, idump_sub, rd_interval, \ 286 sum, xce_sub, iseek) 287#endif 288