1/*--------------------------------------------------------------------
2
3  NAS Parallel Benchmarks 2.3 OpenMP C versions - IS
4
5  This benchmark is an OpenMP C version of the NPB IS code.
6
7  The OpenMP C versions are developed by RWCP and derived from the serial
8  Fortran versions in "NPB 2.3-serial" developed by NAS.
9
10  Permission to use, copy, distribute and modify this software for any
11  purpose with or without fee is hereby granted.
12  This software is provided "as is" without express or implied warranty.
13
14  Send comments on the OpenMP C versions to pdp-openmp@rwcp.or.jp
15
16  Information on OpenMP activities at RWCP is available at:
17
18           http://pdplab.trc.rwcp.or.jp/pdperf/Omni/
19
20  Information on NAS Parallel Benchmarks 2.3 is available at:
21
22           http://www.nas.nasa.gov/NAS/NPB/
23
24--------------------------------------------------------------------*/
25/*--------------------------------------------------------------------
26
27  Author: M. Yarrow
28
29  OpenMP C version: S. Satoh
30
31--------------------------------------------------------------------*/
32
33#include "npb-C.h"
34#include "npbparams.h"
35#include <stdlib.h>
36#include <stdio.h>
37#include <stdint.h>
38#if defined(_OPENMP)
39#include <omp.h>
40#endif /* _OPENMP */
41
42#define STACK_SIZE (8 * 1024 * 1024)
43
44/*****************************************************************/
45/* For serial IS, buckets are not really req'd to solve NPB1 IS  */
46/* spec, but their use on some machines improves performance, on */
47/* other machines the use of buckets compromises performance,    */
48/* probably because it is extra computation which is not req'd.  */
49/* (Note: Mechanism not understood, probably cache related)      */
50/* Example:  SP2-66MhzWN:  50% speedup with buckets              */
51/* Example:  SGI Indy5000: 50% slowdown with buckets             */
52/* Example:  SGI O2000:   400% slowdown with buckets (Wow!)      */
53/*****************************************************************/
54/* #define USE_BUCKETS  */
55/* buckets are not used in the OpenMP C version */
56
57
58/******************/
59/* default values */
60/******************/
61#ifndef CLASS
62#define CLASS 'S'
63#endif
64
65
66/*************/
67/*  CLASS S  */
68/*************/
69#if CLASS == 'S'
70#define  TOTAL_KEYS_LOG_2    16
71#define  MAX_KEY_LOG_2       11
72#define  NUM_BUCKETS_LOG_2   9
73#endif
74
75
76/*************/
77/*  CLASS W  */
78/*************/
79#if CLASS == 'W'
80#define  TOTAL_KEYS_LOG_2    20
81#define  MAX_KEY_LOG_2       16
82#define  NUM_BUCKETS_LOG_2   10
83#endif
84
85/*************/
86/*  CLASS A  */
87/*************/
88#if CLASS == 'A'
89#define  TOTAL_KEYS_LOG_2    23
90#define  MAX_KEY_LOG_2       19
91#define  NUM_BUCKETS_LOG_2   10
92#endif
93
94
95/*************/
96/*  CLASS B  */
97/*************/
98#if CLASS == 'B'
99#define  TOTAL_KEYS_LOG_2    25
100#define  MAX_KEY_LOG_2       21
101#define  NUM_BUCKETS_LOG_2   10
102#endif
103
104
105/*************/
106/*  CLASS C  */
107/*************/
108#if CLASS == 'C'
109#define  TOTAL_KEYS_LOG_2    27
110#define  MAX_KEY_LOG_2       23
111#define  NUM_BUCKETS_LOG_2   10
112#endif
113
114
115#define  TOTAL_KEYS          (1 << TOTAL_KEYS_LOG_2)
116#define  MAX_KEY             (1 << MAX_KEY_LOG_2)
117#define  NUM_BUCKETS         (1 << NUM_BUCKETS_LOG_2)
118#define  NUM_KEYS            TOTAL_KEYS
119#define  SIZE_OF_BUFFERS     NUM_KEYS
120
121
122#define  MAX_ITERATIONS      10
123#define  TEST_ARRAY_SIZE     5
124
125
126/*************************************/
127/* Typedef: if necessary, change the */
128/* size of int here by changing the  */
129/* int type to, say, long            */
130/*************************************/
131typedef  int  INT_TYPE;
132
133
134/********************/
135/* Some global info */
136/********************/
137INT_TYPE *key_buff_ptr_global;         /* used by full_verify to get */
138                                       /* copies of rank info        */
139
140int      passed_verification;
141
142
143/************************************/
144/* These are the three main arrays. */
145/* See SIZE_OF_BUFFERS def above    */
146/************************************/
147INT_TYPE key_array[SIZE_OF_BUFFERS],
148         key_buff1[SIZE_OF_BUFFERS],
149         key_buff2[SIZE_OF_BUFFERS],
150         partial_verify_vals[TEST_ARRAY_SIZE];
151
152#ifdef USE_BUCKETS
153INT_TYPE bucket_size[NUM_BUCKETS],
154         bucket_ptrs[NUM_BUCKETS];
155#endif
156
157
158/**********************/
159/* Partial verif info */
160/**********************/
161INT_TYPE test_index_array[TEST_ARRAY_SIZE],
162         test_rank_array[TEST_ARRAY_SIZE],
163
164         S_test_index_array[TEST_ARRAY_SIZE] =
165                             {48427,17148,23627,62548,4431},
166         S_test_rank_array[TEST_ARRAY_SIZE] =
167                             {0,18,346,64917,65463},
168
169         W_test_index_array[TEST_ARRAY_SIZE] =
170                             {357773,934767,875723,898999,404505},
171         W_test_rank_array[TEST_ARRAY_SIZE] =
172                             {1249,11698,1039987,1043896,1048018},
173
174         A_test_index_array[TEST_ARRAY_SIZE] =
175                             {2112377,662041,5336171,3642833,4250760},
176         A_test_rank_array[TEST_ARRAY_SIZE] =
177                             {104,17523,123928,8288932,8388264},
178
179         B_test_index_array[TEST_ARRAY_SIZE] =
180                             {41869,812306,5102857,18232239,26860214},
181         B_test_rank_array[TEST_ARRAY_SIZE] =
182                             {33422937,10244,59149,33135281,99},
183
184         C_test_index_array[TEST_ARRAY_SIZE] =
185                             {44172927,72999161,74326391,129606274,21736814},
186         C_test_rank_array[TEST_ARRAY_SIZE] =
187                             {61147,882988,266290,133997595,133525895};
188
189
190
191/***********************/
192/* function prototypes */
193/***********************/
194double	is_randlc( double *X, double *A );
195
196void full_verify( void );
197
198/*
199 *    FUNCTION RANDLC (X, A)
200 *
201 *  This routine returns a uniform pseudorandom double precision number in the
202 *  range (0, 1) by using the linear congruential generator
203 *
204 *  x_{k+1} = a x_k  (mod 2^46)
205 *
206 *  where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
207 *  before repeating.  The argument A is the same as 'a' in the above formula,
208 *  and X is the same as x_0.  A and X must be odd double precision integers
209 *  in the range (1, 2^46).  The returned value RANDLC is normalized to be
210 *  between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain
211 *  the new seed x_1, so that subsequent calls to RANDLC using the same
212 *  arguments will generate a continuous sequence.
213 *
214 *  This routine should produce the same results on any computer with at least
215 *  48 mantissa bits in double precision floating point data.  On Cray systems,
216 *  double precision should be disabled.
217 *
218 *  David H. Bailey     October 26, 1990
219 *
220 *     IMPLICIT DOUBLE PRECISION (A-H, O-Z)
221 *     SAVE KS, R23, R46, T23, T46
222 *     DATA KS/0/
223 *
224 *  If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46,
225 *  T23 = 2 ^ 23, and T46 = 2 ^ 46.  These are computed in loops, rather than
226 *  by merely using the ** operator, in order to insure that the results are
227 *  exact on all systems.  This code assumes that 0.5D0 is represented exactly.
228 */
229
230
231/*****************************************************************/
232/*************           R  A  N  D  L  C             ************/
233/*************                                        ************/
234/*************    portable random number generator    ************/
235/*****************************************************************/
236
237double	is_randlc(X, A)
238double *X;
239double *A;
240{
241      static int        KS=0;
242      static double	R23, R46, T23, T46;
243      double		T1, T2, T3, T4;
244      double		A1;
245      double		A2;
246      double		X1;
247      double		X2;
248      double		Z;
249      int     		i, j;
250
251      if (KS == 0)
252      {
253        R23 = 1.0;
254        R46 = 1.0;
255        T23 = 1.0;
256        T46 = 1.0;
257
258        for (i=1; i<=23; i++)
259        {
260          R23 = 0.50 * R23;
261          T23 = 2.0 * T23;
262        }
263        for (i=1; i<=46; i++)
264        {
265          R46 = 0.50 * R46;
266          T46 = 2.0 * T46;
267        }
268        KS = 1;
269      }
270
271/*  Break A into two parts such that A = 2^23 * A1 + A2 and set X = N.  */
272
273      T1 = R23 * *A;
274      j  = T1;
275      A1 = j;
276      A2 = *A - T23 * A1;
277
278/*  Break X into two parts such that X = 2^23 * X1 + X2, compute
279    Z = A1 * X2 + A2 * X1  (mod 2^23), and then
280    X = 2^23 * Z + A2 * X2  (mod 2^46).                            */
281
282      T1 = R23 * *X;
283      j  = T1;
284      X1 = j;
285      X2 = *X - T23 * X1;
286      T1 = A1 * X2 + A2 * X1;
287
288      j  = R23 * T1;
289      T2 = j;
290      Z = T1 - T23 * T2;
291      T3 = T23 * Z + A2 * X2;
292      j  = R46 * T3;
293      T4 = j;
294      *X = T3 - T46 * T4;
295      return(R46 * *X);
296}
297
298
299
300
301/*****************************************************************/
302/*************      C  R  E  A  T  E  _  S  E  Q      ************/
303/*****************************************************************/
304
305void	create_seq( double seed, double a )
306{
307	double x;
308	int    i, j, k;
309
310        k = MAX_KEY/4;
311
312	for (i=0; i<NUM_KEYS; i++)
313	{
314	    x = is_randlc(&seed, &a);
315	    x += is_randlc(&seed, &a);
316    	    x += is_randlc(&seed, &a);
317	    x += is_randlc(&seed, &a);
318
319            key_array[i] = k*x;
320	}
321}
322
323
324
325
326/*****************************************************************/
327/*************    F  U  L  L  _  V  E  R  I  F  Y     ************/
328/*****************************************************************/
329
330
331void full_verify()
332{
333    INT_TYPE    i, j;
334    INT_TYPE    k;
335    INT_TYPE    m, unique_keys;
336
337
338
339/*  Now, finally, sort the keys:  */
340    for( i=0; i<NUM_KEYS; i++ )
341        key_array[--key_buff_ptr_global[key_buff2[i]]] = key_buff2[i];
342
343
344/*  Confirm keys correctly sorted: count incorrectly sorted keys, if any */
345
346    j = 0;
347    for( i=1; i<NUM_KEYS; i++ )
348        if( key_array[i-1] > key_array[i] )
349            j++;
350
351
352    if( j != 0 )
353    {
354        printf( "Full_verify: number of keys out of sort: %d\n",
355                j );
356    }
357    else
358        passed_verification++;
359
360
361}
362
363
364
365
366/*****************************************************************/
367/*************             R  A  N  K             ****************/
368/*****************************************************************/
369
370void rank( int iteration )
371{
372
373    INT_TYPE    i, j, k;
374    INT_TYPE    l, m;
375
376    INT_TYPE    shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2;
377    INT_TYPE    key;
378    INT_TYPE    min_key_val, max_key_val;
379
380    INT_TYPE	prv_buff1[MAX_KEY];
381
382#pragma omp master
383  {
384    key_array[iteration] = iteration;
385    key_array[iteration+MAX_ITERATIONS] = MAX_KEY - iteration;
386
387/*  Determine where the partial verify test keys are, load into  */
388/*  top of array bucket_size                                     */
389    for( i=0; i<TEST_ARRAY_SIZE; i++ )
390        partial_verify_vals[i] = key_array[test_index_array[i]];
391
392/*  Clear the work array */
393    for( i=0; i<MAX_KEY; i++ )
394        key_buff1[i] = 0;
395  }
396#pragma omp barrier
397
398  for (i=0; i<MAX_KEY; i++)
399      prv_buff1[i] = 0;
400
401/*  Copy keys into work array; keys in key_array will be reused each iter. */
402#pragma omp for nowait
403    for( i=0; i<NUM_KEYS; i++ ) {
404        key_buff2[i] = key_array[i];
405
406/*  Ranking of all keys occurs in this section:                 */
407
408/*  In this section, the keys themselves are used as their
409    own indexes to determine how many of each there are: their
410    individual population                                       */
411
412        prv_buff1[key_buff2[i]]++;  /* Now they have individual key   */
413    }
414                                       /* population                     */
415    for( i=0; i<MAX_KEY-1; i++ )
416        prv_buff1[i+1] += prv_buff1[i];
417
418
419#pragma omp critical
420    {
421	for( i=0; i<MAX_KEY; i++ )
422	    key_buff1[i] += prv_buff1[i];
423    }
424
425/*  To obtain ranks of each key, successively add the individual key
426    population, not forgetting to add m, the total of lesser keys,
427    to the first key population                                          */
428
429#pragma omp barrier
430#pragma omp master
431  {
432
433/* This is the partial verify test section */
434/* Observe that test_rank_array vals are   */
435/* shifted differently for different cases */
436    for( i=0; i<TEST_ARRAY_SIZE; i++ )
437    {
438        k = partial_verify_vals[i];          /* test vals were put here */
439        if( 0 <= k  &&  k <= NUM_KEYS-1 )
440            switch( CLASS )
441            {
442                case 'S':
443                    if( i <= 2 )
444                    {
445                        if( key_buff1[k-1] != test_rank_array[i]+iteration )
446                        {
447                            printf( "Failed partial verification: "
448                                  "iteration %d, test key %d\n",
449                                   iteration, i );
450                        }
451                        else
452                            passed_verification++;
453                    }
454                    else
455                    {
456                        if( key_buff1[k-1] != test_rank_array[i]-iteration )
457                        {
458                            printf( "Failed partial verification: "
459                                  "iteration %d, test key %d\n",
460                                   iteration, i );
461                        }
462                        else
463                            passed_verification++;
464                    }
465                    break;
466                case 'W':
467                    if( i < 2 )
468                    {
469                        if( key_buff1[k-1] !=
470                                          test_rank_array[i]+(iteration-2) )
471                        {
472                            printf( "Failed partial verification: "
473                                  "iteration %d, test key %d\n",
474                                   iteration, i );
475                        }
476                        else
477                            passed_verification++;
478                    }
479                    else
480                    {
481                        if( key_buff1[k-1] != test_rank_array[i]-iteration )
482                        {
483                            printf( "Failed partial verification: "
484                                  "iteration %d, test key %d\n",
485                                   iteration, i );
486                        }
487                        else
488                            passed_verification++;
489                    }
490                    break;
491                case 'A':
492                    if( i <= 2 )
493        	    {
494                        if( key_buff1[k-1] !=
495                                          test_rank_array[i]+(iteration-1) )
496                        {
497                            printf( "Failed partial verification: "
498                                  "iteration %d, test key %d\n",
499                                   iteration, i );
500                        }
501                        else
502                            passed_verification++;
503        	    }
504                    else
505                    {
506                        if( key_buff1[k-1] !=
507                                          test_rank_array[i]-(iteration-1) )
508                        {
509                            printf( "Failed partial verification: "
510                                  "iteration %d, test key %d\n",
511                                   iteration, i );
512                        }
513                        else
514                            passed_verification++;
515                    }
516                    break;
517                case 'B':
518                    if( i == 1 || i == 2 || i == 4 )
519        	    {
520                        if( key_buff1[k-1] != test_rank_array[i]+iteration )
521                        {
522                            printf( "Failed partial verification: "
523                                  "iteration %d, test key %d\n",
524                                   iteration, i );
525                        }
526                        else
527                            passed_verification++;
528        	    }
529                    else
530                    {
531                        if( key_buff1[k-1] != test_rank_array[i]-iteration )
532                        {
533                            printf( "Failed partial verification: "
534                                  "iteration %d, test key %d\n",
535                                   iteration, i );
536                        }
537                        else
538                            passed_verification++;
539                    }
540                    break;
541                case 'C':
542                    if( i <= 2 )
543        	    {
544                        if( key_buff1[k-1] != test_rank_array[i]+iteration )
545                        {
546                            printf( "Failed partial verification: "
547                                  "iteration %d, test key %d\n",
548                                   iteration, i );
549                        }
550                        else
551                            passed_verification++;
552        	    }
553                    else
554                    {
555                        if( key_buff1[k-1] != test_rank_array[i]-iteration )
556                        {
557                            printf( "Failed partial verification: "
558                                  "iteration %d, test key %d\n",
559                                   iteration, i );
560                        }
561                        else
562                            passed_verification++;
563                    }
564                    break;
565            }
566    }
567
568
569
570
571/*  Make copies of rank info for use by full_verify: these variables
572    in rank are local; making them global slows down the code, probably
573    since they cannot be made register by compiler                        */
574
575    if( iteration == MAX_ITERATIONS )
576        key_buff_ptr_global = key_buff1;
577
578  } /* end master */
579}
580
581
582/*****************************************************************/
583/*************             M  A  I  N             ****************/
584/*****************************************************************/
585
586static int realmain(void *cargv)
587{
588    unsigned argv = (unsigned)((long)cargv);
589    int             i, iteration, itemp;
590    int		    nthreads = 1;
591    double          timecounter, maxtime;
592
593    omp_set_num_threads(argv);
594
595/*  Initialize the verification arrays if a valid class */
596    for( i=0; i<TEST_ARRAY_SIZE; i++ )
597        switch( CLASS )
598        {
599            case 'S':
600                test_index_array[i] = S_test_index_array[i];
601                test_rank_array[i]  = S_test_rank_array[i];
602                break;
603            case 'A':
604                test_index_array[i] = A_test_index_array[i];
605                test_rank_array[i]  = A_test_rank_array[i];
606                break;
607            case 'W':
608                test_index_array[i] = W_test_index_array[i];
609                test_rank_array[i]  = W_test_rank_array[i];
610                break;
611            case 'B':
612                test_index_array[i] = B_test_index_array[i];
613                test_rank_array[i]  = B_test_rank_array[i];
614                break;
615            case 'C':
616                test_index_array[i] = C_test_index_array[i];
617                test_rank_array[i]  = C_test_rank_array[i];
618                break;
619        };
620
621
622
623/*  Printout initial NPB info */
624    printf( "\n\n NAS Parallel Benchmarks 2.3 OpenMP C version"
625	    " - IS Benchmark\n\n" );
626    printf( " Size:  %d  (class %c)\n", TOTAL_KEYS, CLASS );
627    printf( " Iterations:   %d\n", MAX_ITERATIONS );
628
629/*  Initialize timer  */
630    timer_clear( 0 );
631
632/*  Generate random number sequence and subsequent keys on all procs */
633    create_seq( 314159265.00,                    /* Random number gen seed */
634                1220703125.00 );                 /* Random number gen mult */
635
636
637/*  Do one interation for free (i.e., untimed) to guarantee initialization of
638    all data and code pages and respective tables */
639#pragma omp parallel
640    rank( 1 );
641
642/*  Start verification counter */
643    passed_verification = 0;
644
645    if( CLASS != 'S' ) printf( "\n   iteration\n" );
646
647/*  Start timer  */
648    timer_start( 0 );
649
650
651/*  This is the main iteration */
652
653#pragma omp parallel private(iteration)
654    for( iteration=1; iteration<=MAX_ITERATIONS; iteration++ )
655    {
656
657        //#pragma omp master
658        //if( CLASS != 'S' ) printf( "        %d\n", iteration );
659
660        rank( iteration );
661
662#if defined(_OPENMP)
663#pragma omp master
664	nthreads = omp_get_num_threads();
665#endif /* _OPENMP */
666    }
667
668/*  End of timing, obtain maximum time of all processors */
669    timer_stop( 0 );
670    timecounter = timer_read( 0 );
671
672
673/*  This tests that keys are in sequence: sorting of last ranked key seq
674    occurs here, but is an untimed operation                             */
675    full_verify();
676
677
678
679/*  The final printout  */
680    if( passed_verification != 5*MAX_ITERATIONS + 1 ) {
681        passed_verification = 0;
682    }
683
684#ifdef BOMP
685//backend_create_time(argv);
686#endif
687printf("Computetime %d %f\n", argv, timecounter);
688printf("client done\n");
689/*     c_print_results( "IS", */
690/*                      CLASS, */
691/*                      TOTAL_KEYS, */
692/*                      0, */
693/*                      0, */
694/*                      MAX_ITERATIONS, */
695/* 		     nthreads, */
696/*                      timecounter, */
697/*                      ((double) (MAX_ITERATIONS*TOTAL_KEYS)) */
698/*                                                   /timecounter/1000000., */
699/*                      "keys ranked",  */
700/*                      passed_verification, */
701/*                      NPBVERSION, */
702/*                      COMPILETIME, */
703/*                      CC, */
704/*                      CLINK, */
705/*                      C_LIB, */
706/*                      C_INC, */
707/*                      CFLAGS, */
708/*                      CLINKFLAGS, */
709/* 		     "randlc"); */
710
711
712
713         /**************************/
714}        /*  E N D  P R O G R A M  */
715         /**************************/
716
717
718#define STACK_SIZE (8 * 1024 * 1024)
719
720int main(int argc, char** argv)
721{
722    if (argc != 2) { /* Print usage */
723        printf("Usage: %s <Number of threads>\n", argv[0]);
724        exit(-1);
725    }
726
727#ifdef BOMP
728    bomp_bomp_init(atoi(argv[1]));
729#endif /* BOMP */
730 realmain((void*)((long)atoi(argv[1])));
731}
732