1/* 2 * Copyright (c) 2007-2012, ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group. 8 */ 9 10#include "elb.h" 11 12#include <barrelfish/sys_debug.h> 13#include <bench/bench.h> 14#include <trace/trace.h> 15 16static void client_send_packet(void); 17static void start_next_iteration(void); 18static void respond_buffer(size_t i, size_t len); 19 20 21struct ethernet_frame { 22 uint8_t dst_mac[6]; 23 uint8_t src_mac[6]; 24 uint16_t ethertype; 25 uint8_t payload[]; 26} __attribute__((packed)); 27 28 29static uint64_t tscperms; 30 31static size_t buf_cur = 0; 32 33// Number of TX buffers available 34static size_t buf_count; 35 36bool is_server = false; 37static char *app_type = "client"; 38 39static uint64_t sent_at; 40static uint64_t started_at; 41static uint64_t minbase = -1ULL; 42static uint64_t maxbase = -1ULL; 43static bool affinity_set = false; 44 45#define MAX_PAYLOAD 1500 46/** Size of payload for ethernet packets in benchmark */ 47static size_t payload_size = 64; 48 49/** Specifies whether the data should be read by the client */ 50static bool read_incoming = false; 51 52/** Specifies whether a permutation should be used or just a linear scan */ 53static bool read_linear = true; 54 55/** Will be initialized with a permutation for touching the packet content */ 56static uint16_t read_permutation[MAX_PAYLOAD]; 57 58// the cardname provided on commandline 59static char *cardname = "e10k"; 60 61// the queueid asked by the application 62static uint64_t qi = 0; 63 64/** Number of runs to run */ 65static size_t total_runs = 10000; 66 67/** Number of dry runs before we start benchmarking */ 68static size_t dry_runs = 100; 69 70/** Specifies whether the time for each run should be dumped */ 71static bool dump_each_run = false; 72 73/** Specifies if NOCACHE should be used for mapping the buffers */ 74static bool use_nocache = false; 75 76/** Prefix for outputting the results */ 77static const char *out_prefix = ""; 78 79 80/** Benchmark control handle */ 81bench_ctl_t *bench_ctl = NULL; 82 83 84#if TRACE_ONLY_LLNET 85char trbuf[16*1024*1024]; 86#endif // TRACE_ONLY_LLNET 87 88 89 90/** Generate a permutation for touching the packet contents */ 91static void create_read_permutation(void) 92{ 93 uint16_t i; 94 uint16_t j; 95 uint16_t tmp; 96 97 for (i = 0; i < payload_size; i++) { 98 read_permutation[i] = i; 99 } 100 101 srand(rdtsc()); 102 103 // Use fisher-yates shuffle 104 for (i = payload_size - 1; i >= 1; i--) { 105 j = rand() % (i + 1); 106 107 tmp = read_permutation[i]; 108 read_permutation[i] = read_permutation[j]; 109 read_permutation[j] = tmp; 110 } 111} 112 113 114 115 116void benchmark_init(void) 117{ 118 errval_t err; 119 int i; 120 121 net_if_init(cardname, qi); 122 123 buf_count = buffer_count / 2; 124 125 assert(buf_count >= 8); 126 127 err = sys_debug_get_tsc_per_ms(&tscperms); 128 assert(err_is_ok(err)); 129 130 131 // If desired, create permutation for accessing incoming data 132 if (read_incoming && !read_linear) { 133 create_read_permutation(); 134 } 135 136 // Initialize benchmark control 137 bench_ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, total_runs); 138 bench_ctl_dry_runs(bench_ctl, dry_runs); 139 140 // Register a bunch of buffers to avoid race conditions 141 for (i = 0; i < 8; i++) { 142 errval_t err3 = buffer_rx_add(buf_count + i); 143 assert(err3 != SYS_ERR_OK); 144 if (err3 != SYS_ERR_OK) { 145 printf("elb: failed to register buffers...\n"); 146 abort(); 147 } 148 } 149 150 if (is_server) { 151 printf("elb: Starting benchmark server...\n"); 152 } else { 153 printf("elb: Starting benchmark client...\n"); 154 155 156#if TRACE_ONLY_LLNET 157 assert(err_is_ok(err)); 158 err = trace_control(TRACE_EVENT(TRACE_SUBSYS_LLNET, 159 TRACE_EVENT_LLNET_START, 0), 160 TRACE_EVENT(TRACE_SUBSYS_LLNET, 161 TRACE_EVENT_LLNET_STOP, 0), 162 0); 163 assert(err_is_ok(err)); 164 trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_START, 0); 165#endif // TRACE_ONLY_LLNET 166 167 168 169 started_at = rdtsc(); 170 start_next_iteration(); 171 } 172 173} 174 175void benchmark_argument(char *arg) 176{ 177 if (!strcmp(arg, "elb_server=1")) { 178 is_server = true; 179 app_type = "server"; 180 } else if (!strncmp(arg, "runs=", strlen("runs="))) { 181 total_runs = atol(arg + strlen("runs=")); 182 } else if (!strncmp(arg, "dry_runs=", strlen("dry_runs="))) { 183 dry_runs = atol(arg + strlen("dry_runs=")); 184 } else if (!strncmp(arg, "payload_size=", strlen("payload_size="))) { 185 payload_size = atol(arg + strlen("payload_size=")); 186 if (payload_size < 46) { 187 printf("elb: Payload size too small (must be at least 46), has " 188 "been extended to 46!\n"); 189 payload_size = 46; 190 } else if (payload_size > MAX_PAYLOAD) { 191 printf("elb: Payload size too big (must be at most 1500), has " 192 "been limited to 1500!\n"); 193 payload_size = 1500; 194 } 195 } else if (!strncmp(arg, "elp_outprefix=", strlen("elp_outprefix="))) { 196 out_prefix = arg + strlen("elp_outprefix="); 197 } else if (!strncmp(arg, "elb_nocache=", strlen("elb_nocache="))) { 198 use_nocache = !!atol(arg + strlen("elb_nocache=")); 199 } else if (!strncmp(arg, "read_incoming=", strlen("read_incoming="))) { 200 read_incoming = !!atol(arg + strlen("read_incoming=")); 201 } else if (!strncmp(arg, "dump_each=", strlen("dump_each="))) { 202 dump_each_run = !!atol(arg + strlen("dump_each=")); 203 } else if (!strncmp(arg, "affinitymin=", strlen("affinitymin="))) { 204 minbase = atol(arg + strlen("affinitymin=")); 205 } else if(!strncmp(arg, "affinitymax=", strlen("affinitymax="))) { 206 maxbase = atol(arg + strlen("affinitymax=")); 207 } else if(!strncmp(arg, "cardname=", strlen("cardname="))) { 208 cardname = arg + strlen("cardname="); 209 } else if(!strncmp(arg, "queue=", strlen("queue="))) { 210 qi = atol(arg + strlen("queue=")); 211 } else { 212 printf("Invalid command line argument [%s]\n", arg); 213 abort(); 214 } 215 216 if (!affinity_set && minbase != -1ULL && maxbase != -1ULL) { 217 ram_set_affinity(minbase, maxbase); 218 affinity_set = true; 219 } 220} 221 222// Returns the card-name provided by command line parameters 223char *get_cardname(void) 224{ 225 return cardname; 226} 227 228// Returns the queue-id provided by command line parameters 229uint64_t get_cmdline_queueid(void) 230{ 231 return qi; 232} 233 234void benchmark_do_pending_work(void) 235{ 236 return; 237} 238 239void benchmark_rx_done(size_t idx, size_t pkt_len, uint64_t more, 240 uint64_t flags) 241{ 242 static bool first = true; 243 if (is_server) { 244 respond_buffer(idx, pkt_len); 245 } else { 246 // Touch data if desired 247 if (read_incoming) { 248 struct ethernet_frame* frame = buffer_address(idx); 249 volatile uint8_t* b = frame->payload; 250 size_t i; 251 size_t acc = 0; // FIXME: compiler might optimize out this code 252 if (read_linear) { 253 for (i = 0; i< payload_size; i++) { 254 acc += b[i]; 255 } 256 } else { 257 for (i = 0; i < payload_size; i++) { 258 acc += b[read_permutation[i]]; 259 } 260 } 261 } 262 263 cycles_t tsc = rdtsc(); 264 cycles_t result[1] = { 265 tsc - sent_at, 266 }; 267 268#if TRACE_ONLY_LLNET 269 trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_APPRX, 0); 270#endif // TRACE_ONLY_LLNET 271 272 if (first) { 273 printf("elb: First response received\n"); 274 first = false; 275 } 276 277 // Reregister rx buffer 278 errval_t err = buffer_rx_add(idx); 279 if (err != SYS_ERR_OK) { 280 printf("Could not add buffer in RX ring\n"); 281 abort(); 282 } 283 284 if (bench_ctl_add_run(bench_ctl, result)) { 285 uint64_t tscperus = tscperms / 1000; 286 printf("cycles per us %"PRIu64"\n", tscperus); 287 288 // Output our results 289 bench_ctl_dump_csv_bincounting(bench_ctl, 0, 100, 9 * tscperus, 290 25 * tscperus, out_prefix, tscperus); 291 292 bench_ctl_dump_analysis(bench_ctl, 0, out_prefix, tscperus); 293 //bench_ctl_dump_csv(bench_ctl, out_prefix, tscperus); 294 295#if TRACE_ONLY_LLNET 296 trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_STOP, 0); 297 size_t trsz = trace_dump(trbuf, sizeof(trbuf) - 1, NULL); 298 trbuf[trsz] = 0; 299 printf("\n\n\n\nTrace results:\n%s\n\n\n", trbuf); 300#endif // TRACE_ONLY_LLNET 301 302 bench_ctl_destroy(bench_ctl); 303 terminate_benchmark(); 304 } else { 305 start_next_iteration(); 306 } 307 } 308 309} // end function: benchmark_rx_done 310 311void benchmark_tx_done(size_t idx) 312{ 313 if (is_server) { 314 // Reregister rx buffer 315 errval_t err = buffer_rx_add(idx); 316 if (err != SYS_ERR_OK) { 317 printf("Could not add buffer in RX ring\n"); 318 abort(); 319 } 320 321 } 322} 323 324static void start_next_iteration(void) 325{ 326#if TRACE_ONLY_LLNET 327 trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_APPTX, 0); 328#endif // TRACE_ONLY_LLNET 329 330 client_send_packet(); 331} 332 333static void client_send_packet(void) 334{ 335 struct ethernet_frame *frame; 336 const char bcast[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; 337 size_t len = sizeof(*frame) + payload_size; 338 size_t idx = (buf_cur + 1) % buf_count; 339 340 frame = buffer_address(idx); 341 //memcpy(frame->src_mac, our_mac, 6); 342 memcpy(frame->src_mac, bcast, 6); 343 memcpy(frame->dst_mac, bcast, 6); 344 frame->ethertype = 0x0608; 345 sent_at = rdtsc(); 346 errval_t err = buffer_tx_add(idx, 0, len, 0, 0); 347 if (err != SYS_ERR_OK) { 348 printf("Could not add buffer for TX\n"); 349 assert(err != SYS_ERR_OK); 350 abort(); 351 } 352} 353 354static void respond_buffer(size_t i, size_t len) 355{ 356 /*struct ethernet_frame *frame = buf_virt[i]; 357 const char bcast[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; 358 359 memcpy(frame->src_mac, our_mac, 6); 360 memcpy(frame->dst_mac, bcast, 6);*/ 361 362 errval_t err = buffer_tx_add(i, 0, len, 0, 0); 363 if (err != SYS_ERR_OK) { 364 printf("Could not add buffer for TX\n"); 365 assert(err != SYS_ERR_OK); 366 abort(); 367 } 368} 369 370