1/*
2 * Copyright (c) 2009, 2010, 2011, ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9
10#include <stdlib.h>
11#include <stdio.h>
12#include <assert.h>
13#include <stdint.h>
14#include <omp.h>
15#include <arch/x86/barrelfish_kpi/asm_inlines_arch.h>
16
17#define GANG_SCHEDULING
18#undef MEASURE_SYNC
19#define MEASURE
20
21#define WORK_PERIOD	5000000000UL
22#define STACK_SIZE      (64 * 1024)
23
24int main(int argc, char *argv[])
25{
26  uint64_t now, start;
27  volatile uint64_t workcnt, workload = 0;
28  int64_t workmax = 1000;
29  int64_t i;
30
31  if(argc == 1) {
32    printf("calibrating...\n");
33
34    do {
35      workload = 0;
36      workmax *= 2;
37
38      start = rdtsc();
39
40#pragma omp parallel private(i,workload)
41      for(i = 0; i < workmax; i++) {
42#pragma omp barrier
43          workload++;
44      }
45
46      now = rdtsc();
47    } while(now - start < WORK_PERIOD);
48
49    printf("workmax = %ld\n", workmax);
50    return 0;
51  } else {
52    workmax = atol(argv[1]);
53  }
54
55  int nthreads = omp_get_max_threads();
56
57  if(argc == 3) {
58      nthreads = atoi(argv[2]);
59      assert(!"REVISE!!!");
60      bomp_bomp_init(nthreads);
61      omp_set_num_threads(nthreads);
62  }
63
64  printf("threads %d, workmax %ld, CPUs %d\n", nthreads, workmax,
65	 omp_get_num_procs());
66
67#ifdef MEASURE_SYNC
68  uint64_t waits[16] = {
69      0, 1000, 1000000, 1000000000, 500, 5000000, 5000000000, 3000000,
70      0, 1000, 1000000, 1000000000, 500, 5000000, 5000000000, 3000000
71  };
72  uint64_t ts[16][10];
73
74  printf("before sync:\n");
75
76#pragma omp parallel private(workcnt)
77  {
78      for(int j = 0; j < waits[omp_get_thread_num()]; j++) {
79          workcnt++;
80      }
81
82      for(int j = 0; j < 10; j++) {
83          ts[omp_get_thread_num()][j] = rdtsc();
84      }
85  }
86
87  for(int j = 0; j < 10; j++) {
88      printf("timestamp %d: ", j);
89
90      for(int n = 1; n < nthreads; n++) {
91          printf("%ld ", ts[n][j] - ts[n - 1][j]);
92      }
93
94      printf("\n");
95  }
96
97  printf("after sync:\n");
98
99#pragma omp parallel
100  {
101      bomp_synchronize();
102
103      for(int j = 0; j < 10; j++) {
104          ts[omp_get_thread_num()][j] = rdtsc();
105      }
106  }
107
108  for(int j = 0; j < 10; j++) {
109      printf("timestamp %d: ", j);
110
111      for(int n = 1; n < nthreads; n++) {
112          printf("%ld ", ts[n][j] - ts[n - 1][j]);
113      }
114
115      printf("\n");
116  }
117#endif
118
119#ifdef GANG_SCHEDULING
120#pragma omp parallel
121  {
122   //   bomp_synchronize();
123  }
124#endif
125
126  start = rdtsc();
127
128#ifdef MEASURE
129#       define MAXTHREADS      16
130#       define WORKMAX         10000
131
132  static uint64_t starta[MAXTHREADS][WORKMAX];
133  static uint64_t end1[MAXTHREADS][WORKMAX];
134  static uint64_t end2[MAXTHREADS][WORKMAX];
135#endif
136
137  // Do some work
138#pragma omp parallel private(workcnt,i)
139  for(i = 0; i < workmax; i++) {
140#ifdef MEASURE
141      starta[omp_get_thread_num()][i < WORKMAX ? i : WORKMAX] = rdtsc();
142#endif
143      workcnt++;
144#ifdef MEASURE
145      end1[omp_get_thread_num()][i < WORKMAX ? i : WORKMAX] = rdtsc();
146#endif
147
148#pragma omp barrier
149
150#ifdef MEASURE
151      end2[omp_get_thread_num()][i < WORKMAX ? i : WORKMAX] = rdtsc();
152#endif
153  }
154
155  now = rdtsc();
156
157#ifdef MEASURE
158  printf("avg compute time: ");
159  for(int n = 0; n < nthreads; n++) {
160      uint64_t sum = 0, min = end1[0][0], max = 0;
161
162      for(i = 0; i < WORKMAX; i++) {
163          uint64_t val = end1[n][i] - starta[n][i];
164          sum += val;
165          min = val < min ? val : min;
166          max = val > max ? val : max;
167      }
168
169      printf("%lu(%lu,%lu) ", sum / WORKMAX, min, max);
170  }
171  printf("\n");
172
173#if 0
174  printf("wait time dump:\n");
175  for(i = 0; i < WORKMAX; i++) {
176      for(int n = 0; n < nthreads; n++) {
177          uint64_t val = end2[n][i] - end1[n][i];
178          printf("%lu ", val);
179      }
180      printf("\n");
181  }
182#endif
183
184  printf("avg wait time: ");
185  for(int n = 0; n < nthreads; n++) {
186      uint64_t sum = 0, min = end2[0][0], max = 0;
187
188      for(i = 0; i < WORKMAX; i++) {
189          uint64_t val = end2[n][i] - end1[n][i];
190          sum += val;
191          min = val < min ? val : min;
192          max = val > max ? val : max;
193      }
194
195      printf("%lu(%lu,%lu) ", sum / WORKMAX, min, max);
196  }
197  printf("\n");
198#endif
199
200  printf("%s: threads %d, compute time %lu ticks\n", argv[0], nthreads, now - start);
201
202  for(;;);
203  return 0;
204}
205