1/* 2 * Copyright (c) 2014 ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 8 */ 9#include <string.h> 10#include <stdio.h> 11#include <stdlib.h> 12#include <omp.h> 13 14#include <barrelfish/barrelfish.h> 15#include <barrelfish/nameservice_client.h> 16#include <barrelfish/dispatch.h> 17 18#include <xeon_phi/xeon_phi.h> 19#include <xeon_phi/xeon_phi_domain.h> 20 21#include <bench/bench.h> 22#include <xomp/xomp.h> 23 24#define BENCH_MEASURE_LOCAL 0 25 26#define BENCH_RUN_COUNT 5000 27 28#define BENCH_STEP_SIZE 10 29 30#define DEBUG(x...) debug_printf(x) 31 32#define EXPECT_SUCCESS(errval, msg) \ 33 if (err_is_fail(err)) {USER_PANIC_ERR(err, msg);} 34 35static uint32_t nthreads; 36 37static cycles_t timer_xompinit; 38 39static volatile int counter = 0; 40 41static void work_omp(void) 42{ 43#pragma omp parallel 44 { 45 int num_threads = omp_get_num_threads() * 2; 46#pragma omp for nowait schedule (static, 2) 47 for (int i = 0; i < num_threads; i++) { 48 counter++; 49 //debug_printf("thread: %u\n", omp_get_thread_num()); 50 } 51 } 52} 53 54#ifndef __k1om__ 55static void prepare_bomp(void) 56{ 57 cycles_t tsc_start = bench_tsc(); 58 bomp_bomp_init(nthreads); 59 cycles_t tsc_end = bench_tsc(); 60 timer_xompinit = bench_time_diff(tsc_start, tsc_end); 61} 62#endif 63 64static int prepare_xomp(int argc, 65 char *argv[]) 66{ 67 errval_t err; 68 69 xomp_wloc_t location = XOMP_WORKER_LOC_MIXED; 70 for (int i = 3; i < argc; ++i) { 71 if (!strncmp(argv[i], "--location=", 11)) { 72 char *p = strchr(argv[i], '='); 73 p++; 74 if (!strcmp(p, "local")) { 75 location = XOMP_WORKER_LOC_LOCAL; 76 } 77 } 78 } 79 80 if (location == XOMP_WORKER_LOC_MIXED) { 81#if XOMP_BENCH_ENABLED 82 xomp_master_bench_enable(BENCH_RUN_COUNT, nthreads, 83 XOMP_MASTER_BENCH_DO_WORK); 84#endif 85 debug_printf("waiting for xeon phi to be ready\n"); 86 err = xeon_phi_domain_blocking_lookup("xeon_phi.0.ready", NULL); 87 EXPECT_SUCCESS(err, "nameservice_blocking_lookup"); 88 err = xeon_phi_domain_blocking_lookup("xeon_phi.1.ready", NULL); 89 EXPECT_SUCCESS(err, "nameservice_blocking_lookup"); 90 } 91 92 struct xomp_spawn local_info = { 93 .argc = argc, 94 .argv = argv, 95#ifdef __k1om__ 96 .path = "/k1om/sbin/benchmarks/xomp_work", 97#else 98 .path = "/x86_64/sbin/benchmarks/xomp_work", 99#endif 100 }; 101 102 struct xomp_spawn remote_info = { 103 .argc = argc, 104 .argv = argv, 105 .path = "/k1om/sbin/benchmarks/xomp_work", 106 }; 107 108 struct xomp_args xomp_arg = { 109 .type = XOMP_ARG_TYPE_DISTINCT, 110 .core_stride = (location == XOMP_WORKER_LOC_LOCAL) ? 1 : 2, 111 .args = { 112 .distinct = { 113 .nthreads = nthreads, 114 .worker_loc = location, 115 .nphi = 2, 116 .local = local_info, 117 .remote = remote_info 118 } 119 } 120 }; 121 122 cycles_t tsc_start = bench_tsc(); 123 if (bomp_xomp_init(&xomp_arg)) { 124 debug_printf("bomp init failed!\n"); 125 exit(1); 126 } 127 cycles_t tsc_end = bench_tsc(); 128 timer_xompinit = bench_time_diff(tsc_start, tsc_end); 129 130 return (location == XOMP_WORKER_LOC_LOCAL); 131} 132 133int main(int argc, 134 char *argv[]) 135{ 136 errval_t err; 137 xomp_wid_t wid; 138 139 cycles_t tsc_start, tsc_end; 140 141 bench_init(); 142 143 err = xomp_worker_parse_cmdline(argc, argv, &wid); 144 if (err_is_ok(err)) { 145 struct xomp_args xw_arg = { 146 .type = XOMP_ARG_TYPE_WORKER, 147 .args = { 148 .worker = { 149 .id = wid 150 } 151 } 152 }; 153 bomp_xomp_init(&xw_arg); 154 } 155 156 if (argc < 3) { 157 debug_printf("Usage: %s <numthreads>\n", argv[0]); 158 exit(1); 159 } 160 161 nthreads = strtoul(argv[1], NULL, 10); 162 if (nthreads == 0) { 163 debug_printf("num threads must be >0\n"); 164 exit(1); 165 } 166 167 DEBUG("\n"); 168 DEBUG("======================================================\n"); 169 debug_printf("Num Threads: %u\n", nthreads); 170 171 uint8_t is_bomp = 0; 172#ifdef __k1om__ 173 if (disp_xeon_phi_id()) { 174 prepare_xomp(argc, argv); 175 } else { 176 prepare_xomp(argc, argv); 177 } 178 179#else 180 for (int i = 2; i < argc; ++i) { 181 if (!strcmp(argv[i], "bomp")) { 182 prepare_bomp(); 183 is_bomp = 1; 184 } else if (!strcmp(argv[i], "xomp")) { 185 prepare_xomp(argc, argv); 186 } else { 187 debug_printf("ignoring argument {%s}\n", argv[i]); 188 } 189 } 190#endif 191 192 DEBUG("\n"); 193 DEBUG("======================================================\n"); 194 DEBUG("work_omp\n"); 195 196 cycles_t tscperus = bench_tsc_per_us(); 197 198 bench_ctl_t *ctl_omp; 199 200 cycles_t timer_omp = 0; 201 char buf[20]; 202 203#ifdef __k1om__ 204 uint8_t do_work = !disp_xeon_phi_id(); 205#endif 206 207 for (uint32_t i = 1; i <= nthreads; ++i) { 208 if (i % BENCH_STEP_SIZE) { 209 if (i != nthreads && i != 2) { 210 continue; 211 } 212#ifdef __k1om__ 213 if (!do_work) { 214 do_work = !do_work; 215 continue; 216 } 217#endif 218 } 219 220#ifdef __k1om__ 221 do_work = !do_work; 222#endif 223 omp_set_num_threads(i); 224 225 ctl_omp = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 226 do { 227 tsc_start = bench_tsc(); 228 work_omp(); 229 tsc_end = bench_tsc(); 230 timer_omp = bench_time_diff(tsc_start, tsc_end); 231 if (is_bomp) { 232#ifdef __k1om__ 233 for (uint32_t j = 0; j < 500 * i; ++j) { 234 thread_yield(); 235 } 236#else 237 238 for (uint32_t j = 0; j < 1000 * i; ++j) { 239 thread_yield(); 240 } 241 242#endif 243 } 244 } while (!bench_ctl_add_run(ctl_omp, &timer_omp)); 245 246 snprintf(buf, 20, "threads=%u", i); 247 248 bench_ctl_dump_analysis(ctl_omp, 0, buf, tscperus); 249 250 bench_ctl_destroy(ctl_omp); 251 } 252 253 debug_printf("-------------------------------------\n"); 254 255#if XOMP_BENCH_ENABLED 256 xomp_master_bench_print_results(); 257#endif 258 debug_printf("-------------------------------------\n"); 259 260 while (1) 261 ; 262 263} 264 265