1/*
2 * Copyright (c) 2007-2012, ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
8 */
9
10#include "elb.h"
11
12#include <barrelfish/sys_debug.h>
13#include <bench/bench.h>
14#include <trace/trace.h>
15
16static void client_send_packet(void);
17static void start_next_iteration(void);
18static void respond_buffer(size_t i, size_t len);
19
20
21struct ethernet_frame {
22    uint8_t dst_mac[6];
23    uint8_t src_mac[6];
24    uint16_t ethertype;
25    uint8_t payload[];
26} __attribute__((packed));
27
28
29static uint64_t tscperms;
30
31static size_t   buf_cur = 0;
32
33// Number of TX buffers available
34static size_t   buf_count;
35
36bool is_server = false;
37static char *app_type = "client";
38
39static uint64_t sent_at;
40static uint64_t started_at;
41static uint64_t minbase = -1ULL;
42static uint64_t maxbase = -1ULL;
43static bool affinity_set = false;
44
45#define MAX_PAYLOAD 1500
46/** Size of payload for ethernet packets in benchmark */
47static size_t payload_size = 64;
48
49/** Specifies whether the data should be read by the client */
50static bool read_incoming = false;
51
52/** Specifies whether a permutation should be used or just a linear scan */
53static bool read_linear = true;
54
55/** Will be initialized with a permutation for touching the packet content */
56static uint16_t read_permutation[MAX_PAYLOAD];
57
58// the cardname provided on commandline
59static char *cardname = "e10k";
60
61// the queueid asked by the application
62static uint64_t qi = 0;
63
64/** Number of runs to run */
65static size_t total_runs = 10000;
66
67/** Number of dry runs before we start benchmarking */
68static size_t dry_runs = 100;
69
70/** Specifies whether the time for each run should be dumped */
71static bool dump_each_run = false;
72
73/** Specifies if NOCACHE should be used for mapping the buffers */
74static bool use_nocache = false;
75
76/** Prefix for outputting the results */
77static const char *out_prefix = "";
78
79
80/** Benchmark control handle */
81bench_ctl_t *bench_ctl = NULL;
82
83
84#if TRACE_ONLY_LLNET
85char trbuf[16*1024*1024];
86#endif // TRACE_ONLY_LLNET
87
88
89
90/** Generate a permutation for touching the packet contents */
91static void create_read_permutation(void)
92{
93    uint16_t i;
94    uint16_t j;
95    uint16_t tmp;
96
97    for (i = 0; i < payload_size; i++) {
98        read_permutation[i] = i;
99    }
100
101    srand(rdtsc());
102
103    // Use fisher-yates shuffle
104    for (i = payload_size - 1; i >= 1; i--) {
105        j = rand() % (i + 1);
106
107        tmp = read_permutation[i];
108        read_permutation[i] = read_permutation[j];
109        read_permutation[j] = tmp;
110    }
111}
112
113
114
115
116void benchmark_init(void)
117{
118    errval_t err;
119    int i;
120
121    net_if_init(cardname, qi);
122
123    buf_count = buffer_count / 2;
124
125    assert(buf_count >= 8);
126
127    err = sys_debug_get_tsc_per_ms(&tscperms);
128    assert(err_is_ok(err));
129
130
131    // If desired, create permutation for accessing incoming data
132    if (read_incoming && !read_linear) {
133        create_read_permutation();
134    }
135
136    // Initialize benchmark control
137    bench_ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, total_runs);
138    bench_ctl_dry_runs(bench_ctl, dry_runs);
139
140    // Register a bunch of buffers to avoid race conditions
141    for (i = 0; i < 8; i++) {
142        errval_t err3 = buffer_rx_add(buf_count + i);
143        assert(err3 != SYS_ERR_OK);
144        if (err3 != SYS_ERR_OK) {
145            printf("elb: failed to register buffers...\n");
146            abort();
147        }
148    }
149
150    if (is_server) {
151        printf("elb: Starting benchmark server...\n");
152    } else {
153        printf("elb: Starting benchmark client...\n");
154
155
156#if TRACE_ONLY_LLNET
157        assert(err_is_ok(err));
158        err = trace_control(TRACE_EVENT(TRACE_SUBSYS_LLNET,
159                                        TRACE_EVENT_LLNET_START, 0),
160                            TRACE_EVENT(TRACE_SUBSYS_LLNET,
161                                        TRACE_EVENT_LLNET_STOP, 0),
162                            0);
163        assert(err_is_ok(err));
164        trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_START, 0);
165#endif // TRACE_ONLY_LLNET
166
167
168
169        started_at = rdtsc();
170        start_next_iteration();
171    }
172
173}
174
175void benchmark_argument(char *arg)
176{
177    if (!strcmp(arg, "elb_server=1")) {
178        is_server = true;
179        app_type = "server";
180    } else if (!strncmp(arg, "runs=", strlen("runs="))) {
181        total_runs = atol(arg + strlen("runs="));
182    } else if (!strncmp(arg, "dry_runs=", strlen("dry_runs="))) {
183        dry_runs = atol(arg + strlen("dry_runs="));
184    } else if (!strncmp(arg, "payload_size=", strlen("payload_size="))) {
185        payload_size = atol(arg + strlen("payload_size="));
186        if (payload_size < 46) {
187            printf("elb: Payload size too small (must be at least 46), has "
188                    "been extended to 46!\n");
189            payload_size = 46;
190        } else if (payload_size > MAX_PAYLOAD) {
191            printf("elb: Payload size too big (must be at most 1500), has "
192                    "been limited to 1500!\n");
193            payload_size = 1500;
194        }
195    } else if (!strncmp(arg, "elp_outprefix=", strlen("elp_outprefix="))) {
196        out_prefix = arg + strlen("elp_outprefix=");
197    } else if (!strncmp(arg, "elb_nocache=", strlen("elb_nocache="))) {
198        use_nocache = !!atol(arg + strlen("elb_nocache="));
199    } else if (!strncmp(arg, "read_incoming=", strlen("read_incoming="))) {
200        read_incoming = !!atol(arg + strlen("read_incoming="));
201    } else if (!strncmp(arg, "dump_each=", strlen("dump_each="))) {
202        dump_each_run = !!atol(arg + strlen("dump_each="));
203    } else if (!strncmp(arg, "affinitymin=", strlen("affinitymin="))) {
204        minbase = atol(arg + strlen("affinitymin="));
205    } else if(!strncmp(arg, "affinitymax=", strlen("affinitymax="))) {
206        maxbase = atol(arg + strlen("affinitymax="));
207    } else if(!strncmp(arg, "cardname=", strlen("cardname="))) {
208        cardname = arg + strlen("cardname=");
209    } else if(!strncmp(arg, "queue=", strlen("queue="))) {
210        qi = atol(arg + strlen("queue="));
211    } else {
212        printf("Invalid command line argument [%s]\n", arg);
213        abort();
214    }
215
216    if (!affinity_set && minbase != -1ULL && maxbase != -1ULL) {
217        ram_set_affinity(minbase, maxbase);
218        affinity_set = true;
219    }
220}
221
222// Returns the card-name provided by command line parameters
223char *get_cardname(void)
224{
225    return cardname;
226}
227
228// Returns the queue-id provided by command line parameters
229uint64_t get_cmdline_queueid(void)
230{
231    return qi;
232}
233
234void benchmark_do_pending_work(void)
235{
236    return;
237}
238
239void benchmark_rx_done(size_t idx, size_t pkt_len, uint64_t more,
240                       uint64_t flags)
241{
242    static bool first = true;
243    if (is_server) {
244        respond_buffer(idx, pkt_len);
245    } else {
246        // Touch data if desired
247        if (read_incoming) {
248            struct ethernet_frame* frame = buffer_address(idx);
249            volatile uint8_t* b = frame->payload;
250            size_t i;
251            size_t acc = 0; // FIXME: compiler might optimize out this code
252            if (read_linear) {
253                for (i = 0; i< payload_size; i++) {
254                    acc += b[i];
255                }
256            } else {
257                for (i = 0; i < payload_size; i++) {
258                    acc += b[read_permutation[i]];
259                }
260            }
261        }
262
263        cycles_t tsc = rdtsc();
264        cycles_t result[1] = {
265            tsc - sent_at,
266        };
267
268#if TRACE_ONLY_LLNET
269        trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_APPRX, 0);
270#endif // TRACE_ONLY_LLNET
271
272        if (first) {
273            printf("elb: First response received\n");
274            first = false;
275        }
276
277        // Reregister rx buffer
278        errval_t err = buffer_rx_add(idx);
279        if (err != SYS_ERR_OK) {
280            printf("Could not add buffer in RX ring\n");
281            abort();
282        }
283
284        if (bench_ctl_add_run(bench_ctl, result)) {
285            uint64_t tscperus = tscperms / 1000;
286            printf("cycles per us %"PRIu64"\n", tscperus);
287
288            // Output our results
289            bench_ctl_dump_csv_bincounting(bench_ctl, 0, 100, 9 * tscperus,
290                    25 * tscperus, out_prefix, tscperus);
291
292            bench_ctl_dump_analysis(bench_ctl, 0,  out_prefix, tscperus);
293            //bench_ctl_dump_csv(bench_ctl, out_prefix, tscperus);
294
295#if TRACE_ONLY_LLNET
296            trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_STOP, 0);
297            size_t trsz = trace_dump(trbuf, sizeof(trbuf) - 1, NULL);
298            trbuf[trsz] = 0;
299            printf("\n\n\n\nTrace results:\n%s\n\n\n", trbuf);
300#endif // TRACE_ONLY_LLNET
301
302            bench_ctl_destroy(bench_ctl);
303            terminate_benchmark();
304        } else {
305            start_next_iteration();
306        }
307    }
308
309} // end function: benchmark_rx_done
310
311void benchmark_tx_done(size_t idx)
312{
313    if (is_server) {
314        // Reregister rx buffer
315        errval_t err = buffer_rx_add(idx);
316        if (err != SYS_ERR_OK) {
317            printf("Could not add buffer in RX ring\n");
318            abort();
319        }
320
321    }
322}
323
324static void start_next_iteration(void)
325{
326#if TRACE_ONLY_LLNET
327        trace_event(TRACE_SUBSYS_LLNET, TRACE_EVENT_LLNET_APPTX, 0);
328#endif // TRACE_ONLY_LLNET
329
330    client_send_packet();
331}
332
333static void client_send_packet(void)
334{
335    struct ethernet_frame *frame;
336    const char bcast[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
337    size_t len = sizeof(*frame) + payload_size;
338    size_t idx = (buf_cur + 1) % buf_count;
339
340    frame = buffer_address(idx);
341    //memcpy(frame->src_mac, our_mac, 6);
342    memcpy(frame->src_mac, bcast, 6);
343    memcpy(frame->dst_mac, bcast, 6);
344    frame->ethertype = 0x0608;
345    sent_at = rdtsc();
346    errval_t err = buffer_tx_add(idx, 0, len, 0, 0);
347    if (err != SYS_ERR_OK) {
348        printf("Could not add buffer for TX\n");
349        assert(err != SYS_ERR_OK);
350        abort();
351    }
352}
353
354static void respond_buffer(size_t i, size_t len)
355{
356    /*struct ethernet_frame *frame = buf_virt[i];
357    const char bcast[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
358
359    memcpy(frame->src_mac, our_mac, 6);
360    memcpy(frame->dst_mac, bcast, 6);*/
361
362    errval_t err = buffer_tx_add(i, 0, len, 0, 0);
363    if (err != SYS_ERR_OK) {
364        printf("Could not add buffer for TX\n");
365        assert(err != SYS_ERR_OK);
366        abort();
367    }
368}
369
370