1/*
2 * Copyright (c) 2014 ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9#include <string.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <omp.h>
13
14#include <barrelfish/barrelfish.h>
15#include <barrelfish/nameservice_client.h>
16#include <barrelfish/dispatch.h>
17
18#include <xeon_phi/xeon_phi.h>
19#include <xeon_phi/xeon_phi_domain.h>
20
21#include <bench/bench.h>
22#include <xomp/xomp.h>
23
24#define BENCH_MEASURE_LOCAL 0
25
26#define BENCH_RUN_COUNT 5000
27
28#define BENCH_STEP_SIZE 10
29
30#define DEBUG(x...) debug_printf(x)
31
32#define EXPECT_SUCCESS(errval, msg) \
33    if (err_is_fail(err)) {USER_PANIC_ERR(err, msg);}
34
35static uint32_t nthreads;
36
37static cycles_t timer_xompinit;
38
39static volatile int counter = 0;
40
41static void work_omp(void)
42{
43#pragma omp parallel
44    {
45        int num_threads = omp_get_num_threads() * 2;
46#pragma omp  for nowait schedule (static, 2)
47        for (int i = 0; i < num_threads; i++) {
48            counter++;
49            //debug_printf("thread: %u\n", omp_get_thread_num());
50        }
51    }
52}
53
54#ifndef __k1om__
55static void prepare_bomp(void)
56{
57    cycles_t tsc_start = bench_tsc();
58    bomp_bomp_init(nthreads);
59    cycles_t tsc_end = bench_tsc();
60    timer_xompinit = bench_time_diff(tsc_start, tsc_end);
61}
62#endif
63
64static int prepare_xomp(int argc,
65                        char *argv[])
66{
67    errval_t err;
68
69    xomp_wloc_t location = XOMP_WORKER_LOC_MIXED;
70    for (int i = 3; i < argc; ++i) {
71        if (!strncmp(argv[i], "--location=", 11)) {
72            char *p = strchr(argv[i], '=');
73            p++;
74            if (!strcmp(p, "local")) {
75                location = XOMP_WORKER_LOC_LOCAL;
76            }
77        }
78    }
79
80    if (location == XOMP_WORKER_LOC_MIXED) {
81#if XOMP_BENCH_ENABLED
82        xomp_master_bench_enable(BENCH_RUN_COUNT, nthreads,
83                        XOMP_MASTER_BENCH_DO_WORK);
84#endif
85        debug_printf("waiting for xeon phi to be ready\n");
86        err = xeon_phi_domain_blocking_lookup("xeon_phi.0.ready", NULL);
87        EXPECT_SUCCESS(err, "nameservice_blocking_lookup");
88        err = xeon_phi_domain_blocking_lookup("xeon_phi.1.ready", NULL);
89        EXPECT_SUCCESS(err, "nameservice_blocking_lookup");
90    }
91
92    struct xomp_spawn local_info = {
93        .argc = argc,
94        .argv = argv,
95#ifdef __k1om__
96        .path = "/k1om/sbin/benchmarks/xomp_work",
97#else
98		.path = "/x86_64/sbin/benchmarks/xomp_work",
99#endif
100    };
101
102    struct xomp_spawn remote_info = {
103        .argc = argc,
104        .argv = argv,
105        .path = "/k1om/sbin/benchmarks/xomp_work",
106    };
107
108    struct xomp_args xomp_arg = {
109        .type = XOMP_ARG_TYPE_DISTINCT,
110        .core_stride = (location == XOMP_WORKER_LOC_LOCAL) ? 1 : 2,
111        .args = {
112            .distinct = {
113                .nthreads = nthreads,
114                .worker_loc = location,
115                .nphi = 2,
116                .local = local_info,
117                .remote = remote_info
118            }
119        }
120    };
121
122    cycles_t tsc_start = bench_tsc();
123    if (bomp_xomp_init(&xomp_arg)) {
124        debug_printf("bomp init failed!\n");
125        exit(1);
126    }
127    cycles_t tsc_end = bench_tsc();
128    timer_xompinit = bench_time_diff(tsc_start, tsc_end);
129
130    return (location == XOMP_WORKER_LOC_LOCAL);
131}
132
133int main(int argc,
134         char *argv[])
135{
136    errval_t err;
137    xomp_wid_t wid;
138
139    cycles_t tsc_start, tsc_end;
140
141    bench_init();
142
143    err = xomp_worker_parse_cmdline(argc, argv, &wid);
144    if (err_is_ok(err)) {
145        struct xomp_args xw_arg = {
146            .type = XOMP_ARG_TYPE_WORKER,
147            .args = {
148                .worker = {
149                    .id = wid
150                }
151            }
152        };
153        bomp_xomp_init(&xw_arg);
154    }
155
156    if (argc < 3) {
157        debug_printf("Usage: %s <numthreads>\n", argv[0]);
158        exit(1);
159    }
160
161    nthreads = strtoul(argv[1], NULL, 10);
162    if (nthreads == 0) {
163        debug_printf("num threads must be >0\n");
164        exit(1);
165    }
166
167    DEBUG("\n");
168    DEBUG("======================================================\n");
169    debug_printf("Num Threads: %u\n", nthreads);
170
171    uint8_t is_bomp = 0;
172#ifdef __k1om__
173    if (disp_xeon_phi_id()) {
174        prepare_xomp(argc, argv);
175    } else {
176        prepare_xomp(argc, argv);
177    }
178
179#else
180    for (int i = 2; i < argc; ++i) {
181        if (!strcmp(argv[i], "bomp")) {
182            prepare_bomp();
183            is_bomp = 1;
184        } else if (!strcmp(argv[i], "xomp")) {
185            prepare_xomp(argc, argv);
186        } else {
187            debug_printf("ignoring argument {%s}\n", argv[i]);
188        }
189    }
190#endif
191
192    DEBUG("\n");
193    DEBUG("======================================================\n");
194    DEBUG("work_omp\n");
195
196    cycles_t tscperus = bench_tsc_per_us();
197
198    bench_ctl_t *ctl_omp;
199
200    cycles_t timer_omp = 0;
201    char buf[20];
202
203#ifdef __k1om__
204    uint8_t do_work = !disp_xeon_phi_id();
205#endif
206
207    for (uint32_t i = 1; i <= nthreads; ++i) {
208        if (i % BENCH_STEP_SIZE) {
209            if (i != nthreads && i != 2) {
210                continue;
211            }
212#ifdef __k1om__
213            if (!do_work) {
214                do_work = !do_work;
215                continue;
216            }
217#endif
218        }
219
220#ifdef __k1om__
221    do_work = !do_work;
222#endif
223        omp_set_num_threads(i);
224
225        ctl_omp = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT);
226        do {
227            tsc_start = bench_tsc();
228            work_omp();
229            tsc_end = bench_tsc();
230            timer_omp = bench_time_diff(tsc_start, tsc_end);
231            if (is_bomp) {
232#ifdef __k1om__
233                for (uint32_t j = 0; j < 500 * i; ++j) {
234                    thread_yield();
235                }
236#else
237
238                for (uint32_t j = 0; j < 1000 * i; ++j) {
239                    thread_yield();
240                }
241
242#endif
243            }
244        } while (!bench_ctl_add_run(ctl_omp, &timer_omp));
245
246        snprintf(buf, 20, "threads=%u", i);
247
248        bench_ctl_dump_analysis(ctl_omp, 0, buf, tscperus);
249
250        bench_ctl_destroy(ctl_omp);
251    }
252
253    debug_printf("-------------------------------------\n");
254
255#if XOMP_BENCH_ENABLED
256    xomp_master_bench_print_results();
257#endif
258    debug_printf("-------------------------------------\n");
259
260    while (1)
261        ;
262
263}
264
265