1/**
2 * \file
3 * \brief User-space Microbenchmarks.
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2012, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <stdio.h>
16#include <stdint.h>
17#include <inttypes.h>
18#include <barrelfish/barrelfish.h>
19#include <barrelfish/dispatch.h>
20
21/* #define MINIMAL_BARRELFISH */
22
23#ifndef BENCH_POSIX
24#       include <barrelfish/barrelfish.h>
25#       include <barrelfish_kpi/syscalls.h>
26#       include <barrelfish_kpi/types.h>
27#       include <barrelfish/syscalls.h>
28#       include <barrelfish/sys_debug.h>
29// #       include <barrelfish/backwards/idc.h>
30#       include <stdarg.h>
31#       include <string.h>
32#else
33#       include <sys/types.h>
34#       include <unistd.h>
35#endif
36
37#define MICROBENCH_ITERATIONS 1000000
38
39typedef void (*Benchmark)(void);
40
41#ifdef BENCH_POSIX
42
43static inline uint64_t rdtsc(void)
44{
45    uint64_t eax, edx;
46    __asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
47    return (edx << 32) | eax;
48}
49
50#else
51
52// Dummy mcount() that does nothing. For profiling.
53static __attribute__ ((unused,no_instrument_function)) void mcount(void)
54{
55}
56
57#ifdef MINIMAL_BARRELFISH
58
59static int myprintf(const char *fmt, ...)
60{
61    va_list     argptr;
62    char        str[256];
63    int         ret;
64
65    va_start(argptr, fmt);
66    ret = vsnprintf(str, sizeof(str), fmt, argptr);
67    sys_print(str, strlen(str));
68    va_end(argptr);
69
70    return ret;
71}
72
73#       define printf  myprintf
74
75#endif
76
77#endif
78
79static uint64_t
80divide_round(uint64_t quotient, uint64_t divisor)
81{
82    if ((quotient % divisor) * 2 >= divisor) {
83        // round up
84        return (quotient / divisor) + 1;
85    } else {
86        return (quotient / divisor);
87    }
88}
89
90static inline void print_result(uint64_t result)
91{
92    printf("%" PRIu64 " ticks. Done %u iterations in %" PRIu64 " ticks.\n",
93           divide_round(result, MICROBENCH_ITERATIONS), MICROBENCH_ITERATIONS,
94           result);
95}
96
97static void syscall_benchmark(void)
98{
99    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
100#ifndef BENCH_POSIX
101        sys_nop();
102#else
103        getpid();
104#endif
105    }
106}
107
108static void rdtsc_benchmark(void)
109{
110    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
111        rdtsc();
112    }
113}
114
115static void xchg_benchmark(void)
116{
117    uint32_t mem = 1, reg = 7;
118
119    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
120        __asm__ __volatile__(
121            "xchg %0, %1"
122            : "=a" (reg), "=m" (mem)
123            : "a" (reg));
124    }
125}
126
127static void cmpxchg_benchmark(void)
128{
129    uint32_t src = 5, dest = 5, reg = 5;
130
131    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
132        __asm__ __volatile__(
133            "lock cmpxchg %1, %2"
134            : "=a" (reg)
135            : "q"(src), "m"(dest), "0"(reg)
136            : "memory");
137    }
138}
139
140static void dec_benchmark(void)
141{
142    uint32_t val = 1000000;
143
144    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
145        __asm__ __volatile__(
146            "lock dec{l} %0"
147            : "+m" (val)
148            :
149            : "memory");
150    }
151}
152
153#if 0
154
155/*
156 * XXX: These two benchmarks are not implemented safely (xor'ing rdx,
157 * going back to C and then assuming rdx is still zero doesn't work)
158 * and do not currently compile this way. I'm disabling them until
159 * someone fixes them.
160 */
161
162static void bts_benchmark(void)
163{
164    uint64_t mem = 3;
165
166    __asm__ __volatile__ ("xor %%rdx, %%rdx" ::);
167
168    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
169        __asm__ __volatile__ (
170            "lock bts %%rdx,%0"
171            : /* no output */
172            : "m" (mem)
173            : "%rdx");
174    }
175}
176
177static void bts_clr_benchmark(void)
178{
179    uint64_t mem = 3;
180
181    __asm__ __volatile__("xor %%rdx,%%rdx");
182
183    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
184        __asm__ __volatile__ (
185            "lock bts %%rdx,%0\n\t"
186            "movq $0,%0\n\t"
187            : /* no output */
188            : "m" (mem)
189            : "%rdx");
190    }
191}
192
193#endif
194
195static void clr_benchmark(void)
196{
197    uint64_t mem = 0;
198
199    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
200        __asm__ __volatile__ (
201            "movl $0,%0\n\t"
202            : /* no output */
203            : "m" (mem)
204            : "%rax");
205    }
206}
207
208#ifndef BENCH_POSIX
209
210#if 0
211static void rdtscp_benchmark(void)
212{
213    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
214        rdtscp();
215    }
216}
217#endif
218
219#if 0   // XXX: update to new IDC system to make it work
220static struct capref capaddr;
221
222static void capinvoke_benchmark(void)
223{
224    struct idc_send_msg msg;
225
226    idc_msg_init(&msg);
227
228    for(int i = 0; i < MICROBENCH_ITERATIONS; i++) {
229        cap_invoke(capaddr, &msg);
230    }
231}
232
233static void idc_benchmark(void)
234{
235    struct idc_send_msg msg;
236    struct idc_recv_msg rmsg;
237    struct idc_endpoint *ep;
238    struct capref epcap;
239
240    int r = endpoint_create(NULL, NULL, DEFAULT_IDC_BUF_WORDS, &epcap, &ep);
241    assert(r == 0);
242
243    idc_msg_init(&msg);
244
245    uint64_t minres = 0, maxres = 0, sum = 0;
246    for(int i = 0; i < 1000; i++) {
247        uint64_t start = rdtsc();
248        cap_invoke(epcap, &msg);
249        messages_idc_wait(ep);
250        idc_endpoint_poll(ep, &rmsg, NULL);
251        uint64_t duration = rdtsc() - start;
252
253        sum += duration;
254        if(minres == 0 && maxres == 0) {
255            minres = maxres = duration;
256        } else {
257            if(duration < minres) {
258                minres = duration;
259            } else {
260                if(duration > maxres) {
261                    maxres = duration;
262                }
263            }
264        }
265    }
266
267    uint64_t variance = maxres - minres;
268    printf("min: %" PRIu64 " ticks, max: %" PRIu64 " ticks, "
269           "variance: %" PRIu64 " ticks, avg: %" PRIu64 " ticks\n",
270           minres, maxres, variance, divide_round(sum, 1000));
271}
272#endif
273
274#endif
275
276static uint64_t benchmark(Benchmark bench)
277{
278    uint64_t start, end;
279
280    start = rdtsc();
281    bench();
282    end = rdtsc();
283
284    return end - start;
285}
286
287int main(int argc, char *argv[])
288{
289    printf("bench running on core %d.\n", disp_get_core_id());
290
291    printf("NOP system call: ");
292    print_result(benchmark(syscall_benchmark));
293
294    printf("RDTSC instruction: ");
295    print_result(benchmark(rdtsc_benchmark));
296
297#ifndef BENCH_POSIX
298//    printf("RDTSCP instruction: ");
299//    print_result(benchmark(rdtscp_benchmark));
300
301    printf("XCHG instruction: ");
302    print_result(benchmark(xchg_benchmark));
303
304    printf("LOCK CMPXCHG instruction: ");
305    print_result(benchmark(cmpxchg_benchmark));
306
307    printf("LOCK DEC instruction: ");
308    print_result(benchmark(dec_benchmark));
309
310#if 0
311    printf("atomic test & set: ");
312    print_result(benchmark(bts_benchmark));
313
314    printf("atomic test & set when cleared: ");
315    print_result(benchmark(bts_clr_benchmark));
316#endif
317
318    printf("clear memory: ");
319    print_result(benchmark(clr_benchmark));
320
321#if 0 /* FIXME: change this code to avoid hardcoded caddr manipulation! */
322    printf("NULL cap invocation (1 level deep): ");
323    // 4th entry in rootcn
324    capaddr = 4U << 26;
325    print_result(benchmark(capinvoke_benchmark));
326
327    printf("NULL cap invocation (2 levels deep): ");
328    // last (64th) entry in taskdircn
329    capaddr = 63U << 6;
330    print_result(benchmark(capinvoke_benchmark));
331#endif
332
333#if 0   // XXX: update to new IDC system to make it work
334    printf("NULL cap invocation (3 levels deep): ");
335    // 1st entry in taskcn
336    //capaddr = CPTR_NULL;
337    print_result(benchmark(capinvoke_benchmark));
338
339    printf("Local IDC: ");
340    idc_benchmark();
341#endif
342
343//     printf("Async IDC: %lu ticks\n",
344//            divide_round(benchmark(asyncidc_benchmark), 2));
345
346/*     printf("Local IDC w/ cap transfer: "); */
347/*     localcaptransfer_benchmark(); */
348#endif
349
350    printf("End of benchmarks.\n");
351    return 0;
352}
353