1/* 2 * Copyright (c) 2014 ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 8 */ 9#include <string.h> 10#include <stdlib.h> 11#include <omp.h> 12 13#include <barrelfish/barrelfish.h> 14#include <barrelfish/nameservice_client.h> 15 16#include <xeon_phi/xeon_phi.h> 17#include <xeon_phi/xeon_phi_domain.h> 18 19#include <bench/bench.h> 20#include <xomp/xomp.h> 21 22#define BENCH_MEASURE_LOCAL 0 23#define BENCH_MEASURE_MAP_ONLY 0 24 25#define BENCH_RUN_COUNT 250 26#define BENCH_RUN_SINGLE 0 27 28#define DEBUG(x...) debug_printf(x) 29 30#define EXPECT_SUCCESS(errval, msg) \ 31 if (err_is_fail(err)) {USER_PANIC_ERR(err, msg);} 32 33static uint32_t nthreads; 34 35cycles_t timer_xompinit = 0; 36 37#if BENCH_MEASURE_LOCAL 38static bench_ctl_t *ctl_local; 39#endif 40 41#if BENCH_MEASURE_MAP_ONLY 42static void measure_mapping(struct capref frame) 43{ 44 errval_t err; 45 46 debug_printf("\n"); 47 debug_printf("==========================================\n"); 48 debug_printf("Mapping of frame\n"); 49 50 struct frame_identity id; 51 err = invoke_frame_identify(frame, &id); 52 EXPECT_SUCCESS(err, "invoke_frame_identify"); 53 54 size_t frame_size = (1UL << id.bits); 55 56 bench_ctl_t *b_ctl = NULL; 57 cycles_t tsc_start, tsc_end, elapsed; 58 59 b_ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 60 61 struct capref copy; 62 err = slot_alloc(©); 63 EXPECT_SUCCESS(err, "slot alloc"); 64 65 err = cap_copy(copy, frame); 66 EXPECT_SUCCESS(err, "cap copy"); 67 68 do { 69 70 void *addr; 71 tsc_start = bench_tsc(); 72 err = vspace_map_one_frame(&addr, frame_size, copy, NULL, NULL); 73 tsc_end = bench_tsc(); 74 EXPECT_SUCCESS(err, "vspace map one frame"); 75 76 err = vspace_unmap(addr); 77 EXPECT_SUCCESS(err, "vspace unmap"); 78 elapsed = bench_time_diff(tsc_start, tsc_end); 79 80 }while (!bench_ctl_add_run(b_ctl, &elapsed)); 81 82 debug_printf("\n"); 83 debug_printf("%% %lu bytes\n", frame_size); 84 bench_ctl_dump_analysis(b_ctl, 0, "map local", bench_tsc_per_us()); 85 debug_printf("==========================================\n"); 86} 87#endif 88 89static void prepare_bomp(void) 90{ 91 cycles_t tsc_start = bench_tsc(); 92 bomp_bomp_init(nthreads); 93 cycles_t tsc_end = bench_tsc(); 94 timer_xompinit = bench_time_diff(tsc_start, tsc_end); 95} 96 97static int prepare_xomp(int argc, 98 char *argv[]) 99{ 100 errval_t err; 101 102 xomp_wloc_t location = XOMP_WORKER_LOC_MIXED; 103 for (int i = 3; i < argc; ++i) { 104 if (!strncmp(argv[i], "--location=", 11)) { 105 char *p = strchr(argv[i], '='); 106 p++; 107 if (!strcmp(p, "local")) { 108 location = XOMP_WORKER_LOC_LOCAL; 109 } 110 } 111 } 112 113 if (location == XOMP_WORKER_LOC_MIXED) { 114 debug_printf("waiting for xeon phi to be ready\n"); 115 err = xeon_phi_domain_blocking_lookup("xeon_phi.0.ready", NULL); 116 EXPECT_SUCCESS(err, "nameservice_blocking_lookup"); 117 err = xeon_phi_domain_blocking_lookup("xeon_phi.1.ready", NULL); 118 EXPECT_SUCCESS(err, "nameservice_blocking_lookup"); 119 120#if XOMP_BENCH_ENABLED 121 xomp_master_bench_enable(BENCH_RUN_COUNT, nthreads, 122 XOMP_MASTER_BENCH_MEM_ADD); 123#endif 124 125 } 126 127 struct xomp_spawn local_info = { 128 .argc = argc, 129 .argv = argv, 130#ifdef __k1om__ 131 .path = "/k1om/sbin/benchmarks/xomp_share", 132#else 133 .path = "/x86_64/sbin/benchmarks/xomp_share", 134#endif 135 }; 136 137 struct xomp_spawn remote_info = { 138 .argc = argc, 139 .argv = argv, 140 .path = "/k1om/sbin/benchmarks/xomp_share", 141 }; 142 143 struct xomp_args xomp_arg = { 144 .type = XOMP_ARG_TYPE_DISTINCT, 145 .core_stride = 1, // use default 146 .args = { 147 .distinct = { 148 .nthreads = nthreads, 149 .worker_loc = location, 150 .nphi = 2, 151 .local = local_info, 152 .remote = remote_info 153 } 154 } 155 }; 156 157 cycles_t tsc_start = bench_tsc(); 158 if (bomp_xomp_init(&xomp_arg)) { 159 debug_printf("bomp init failed!\n"); 160 exit(1); 161 } 162 cycles_t tsc_end = bench_tsc(); 163 timer_xompinit = bench_time_diff(tsc_start, tsc_end); 164 165 return (location == XOMP_WORKER_LOC_LOCAL); 166} 167 168int main(int argc, 169 char *argv[]) 170{ 171 errval_t err, repl_err = SYS_ERR_OK; 172 xomp_wid_t wid; 173 174 cycles_t t_share, t_repl; 175 176 bench_init(); 177 178 err = xomp_worker_parse_cmdline(argc, argv, &wid); 179 if (err_is_ok(err)) { 180 struct xomp_args xw_arg = { 181 .type = XOMP_ARG_TYPE_WORKER, 182 .args = { 183 .worker = { 184 .id = wid 185 } 186 } 187 }; 188 bomp_xomp_init(&xw_arg); 189 } 190 191 if (argc < 3) { 192 debug_printf("Usage: %s <numthreats>\n", argv[0]); 193 exit(1); 194 } 195 196 nthreads = strtoul(argv[1], NULL, 10); 197 if (nthreads == 0) { 198 debug_printf("num threads must be >0\n"); 199 exit(1); 200 } 201 202 DEBUG("\n"); 203 DEBUG("======================================================\n"); 204 debug_printf("Num Threads: %u\n", nthreads); 205 206 uint8_t is_shared = 0; 207 for (int i = 2; i < argc; ++i) { 208 if (!strcmp(argv[i], "bomp")) { 209 prepare_bomp(); 210 is_shared = 1; 211 } else if (!strcmp(argv[i], "xomp")) { 212 is_shared = prepare_xomp(argc, argv); 213 } else { 214 debug_printf("ignoring argument {%s}\n", argv[i]); 215 } 216 } 217 218 lvaddr_t vbase = (10UL * 1024 * 1024 * 1024); 219 220 DEBUG("\n"); 221 DEBUG("======================================================\n"); 222 DEBUG("sharing of 4k\n"); 223 224#define FRAME_SIZE_0 4096UL 225#define FRAME_SIZE_1 (1024UL * 1024) 226#define FRAME_SIZE_2 (32UL * 1024 * 1024) 227 228 size_t frame_size; 229 struct capref frame; 230 231 232#if BENCH_MEASURE_MAP_ONLY 233 measure_mapping(frame); 234#else 235 cycles_t tsc_start, tsc_end; 236 237 cycles_t tscperus = bench_tsc_per_us(); 238#if 1 239 bench_ctl_t *ctl_share_4k = NULL, *ctl_repl_4k = NULL; 240 241 ctl_share_4k = bench_ctl_init(BENCH_MODE_FIXEDRUNS,1, BENCH_RUN_COUNT); 242 ctl_repl_4k = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 243 do { 244 err = frame_alloc(&frame, FRAME_SIZE_0, &frame_size); 245 EXPECT_SUCCESS(err, "frame alloc"); 246 assert(frame_size == FRAME_SIZE_0); 247 248 tsc_start = bench_tsc(); 249 err = xomp_master_add_memory(frame, vbase, XOMP_FRAME_TYPE_SHARED_RW); 250 tsc_end = bench_tsc(); 251 EXPECT_SUCCESS(err, "xomp_master_add_memory mA_shared\n"); 252 t_share = bench_time_diff(tsc_start, tsc_end); 253 254 vbase += FRAME_SIZE_0; 255 256 if (err_is_ok(err)) { 257 tsc_start = bench_tsc(); 258 repl_err = xomp_master_add_memory(frame, vbase, XOMP_FRAME_TYPE_REPL_RW); 259 tsc_end = bench_tsc(); 260 if (err_is_ok(err)) { 261 t_repl = bench_time_diff(tsc_start, tsc_end); 262 vbase += FRAME_SIZE_0; 263 bench_ctl_add_run(ctl_repl_4k, &t_repl); 264 } 265 } 266 } while (!bench_ctl_add_run(ctl_share_4k, &t_share)); 267 268 269 bench_ctl_dump_analysis(ctl_share_4k, 0, "4k shared", tscperus); 270 bench_ctl_dump_analysis(ctl_repl_4k, 0, "4k repl", tscperus); 271#endif 272#endif 273 DEBUG("\n"); 274 DEBUG("======================================================\n"); 275 DEBUG("sharing of 1M\n"); 276 277 err = frame_alloc(&frame, FRAME_SIZE_1, &frame_size); 278 EXPECT_SUCCESS(err, "frame alloc"); 279 assert(frame_size == FRAME_SIZE_1); 280 281 vbase = (vbase + FRAME_SIZE_1) & ~(FRAME_SIZE_1 - 1); 282 283#if BENCH_MEASURE_MAP_ONLY 284 measure_mapping(frame); 285#else 286#if 1 287 bench_ctl_t *ctl_share_1M, *ctl_repl_1M; 288 ctl_share_1M = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 289 ctl_repl_1M = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 290 do { 291 err = frame_alloc(&frame, FRAME_SIZE_1, &frame_size); 292 EXPECT_SUCCESS(err, "frame alloc"); 293 assert(frame_size == FRAME_SIZE_1); 294 295 tsc_start = bench_tsc(); 296 err = xomp_master_add_memory(frame, vbase, XOMP_FRAME_TYPE_SHARED_RW); 297 tsc_end = bench_tsc(); 298 EXPECT_SUCCESS(err, "xomp_master_add_memory mA_shared\n"); 299 t_share = bench_time_diff(tsc_start, tsc_end); 300 vbase += FRAME_SIZE_1; 301 302 if (err_is_ok(repl_err)) { 303 tsc_start = bench_tsc(); 304 repl_err = xomp_master_add_memory(frame, vbase, XOMP_FRAME_TYPE_REPL_RW); 305 tsc_end = bench_tsc(); 306 if(err_is_ok(err)) { 307 EXPECT_SUCCESS(err, "xomp_master_add_memory mA_shared\n"); 308 t_repl = bench_time_diff(tsc_start, tsc_end); 309 bench_ctl_add_run(ctl_repl_1M, &t_repl); 310 } 311 vbase += FRAME_SIZE_1; 312 } 313 } while (!bench_ctl_add_run(ctl_share_1M, &t_share)); 314 bench_ctl_dump_analysis(ctl_share_1M, 0, "1M shared", tscperus); 315 bench_ctl_dump_analysis(ctl_repl_1M, 0, "1M repl", tscperus); 316#endif 317#endif 318 DEBUG("\n"); 319 DEBUG("======================================================\n"); 320 DEBUG("sharing of 256M\n"); 321 322 err = frame_alloc(&frame, FRAME_SIZE_2, &frame_size); 323 EXPECT_SUCCESS(err, "frame alloc"); 324 assert(frame_size == FRAME_SIZE_2); 325 326#if BENCH_MEASURE_MAP_ONLY 327 measure_mapping(frame); 328#else 329 330 vbase = (vbase + FRAME_SIZE_2) & ~(FRAME_SIZE_2 - 1); 331 332 bench_ctl_t *ctl_share_256M = NULL; 333 bench_ctl_t *ctl_repl_256M = NULL; 334 uint32_t counter = 0; 335 336 ctl_share_256M = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 337 ctl_repl_256M = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, BENCH_RUN_COUNT); 338 do { 339 tsc_start = bench_tsc(); 340 err = xomp_master_add_memory(frame, vbase, XOMP_FRAME_TYPE_SHARED_RW); 341 tsc_end = bench_tsc(); 342 EXPECT_SUCCESS(err, "xomp_master_add_memory mA_shared\n"); 343 t_share = bench_time_diff(tsc_start, tsc_end); 344 345 vbase += FRAME_SIZE_2; 346 347 348 if (err_is_ok(repl_err)) { 349 tsc_start = bench_tsc(); 350 repl_err = xomp_master_add_memory(frame, vbase, XOMP_FRAME_TYPE_REPL_RW); 351 if (err_is_ok(repl_err)) { 352 tsc_end = bench_tsc(); 353 t_repl = bench_time_diff(tsc_start, tsc_end); 354 bench_ctl_add_run(ctl_repl_256M, &t_repl); 355 } 356 vbase += FRAME_SIZE_2; 357 } 358 counter ++; 359 } while (!bench_ctl_add_run(ctl_share_256M, &t_share)); 360 bench_ctl_dump_analysis(ctl_share_256M, 0, "256M shared", tscperus); 361 bench_ctl_dump_analysis(ctl_repl_256M, 0, "256M repl", tscperus); 362#endif 363 debug_printf("-------------------------------------\n"); 364 365#if XOMP_BENCH_ENABLED 366 xomp_master_bench_print_results(); 367#endif 368 369 while (1) 370 ; 371 372} 373 374