1/**
2 * \file
3 * \brief User and kernel code definitions for system-wide tracing
4 */
5
6/*
7 * Copyright (c) 2007-2010, ETH Zurich.
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP.
9 * All rights reserved.
10 *
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
14 */
15
16#ifndef LIBBARRELFISH_TRACE_H
17#define LIBBARRELFISH_TRACE_H
18
19#if defined(__x86_64__)
20#define TRACING_EXISTS 1
21#endif
22
23
24#ifndef IN_KERNEL
25/* XXX: private includes from libbarrelfish */
26#include <barrelfish/dispatcher_arch.h>
27#include <barrelfish/curdispatcher_arch.h>
28#else // IN_KERNEL
29#if defined(__x86_64__) || defined(__i386__)
30#include <arch/x86/apic.h> // XXX!
31#endif // __x86_64__ || __i386__
32#endif // IN_KERNEL
33
34#include <barrelfish/sys_debug.h>
35#include <barrelfish/waitset.h> // struct event_closure
36#include <barrelfish/domain.h>
37
38#include <stdio.h>
39#include <string.h> // memcpy
40
41/*
42 * turn some tracing on or off
43 */
44#define TRACE_THREADS
45#define TRACE_CSWITCH
46//#define NETWORK_LLSTACK_TRACE 1
47
48/* Trace only network related events
49 * This will reduce the amount of events recorded, and hence allows
50 * recording for longer time. */
51#if CONFIG_TRACE && NETWORK_STACK_TRACE
52#define TRACE_ONLY_SUB_NET 1
53#endif // CONFIG_TRACE && NETWORK_STACK_TRACE
54
55#if CONFIG_TRACE && NETWORK_LLSTACK_TRACE
56#define TRACE_ONLY_LLNET 1
57#endif // CONFIG_TRACE && NETWORK_LLSTACK_TRACE
58
59
60#define CONSOLE_DUMP_BUFLEN (2<<20)
61
62/**
63 * The constants for the subsystems and events are generated from the file
64 * trace_definitions/trace_defs.pleco that can be included after compiling with
65 * #include <trace_definitions/trace_defs.h>
66 * .
67 */
68#include <trace_definitions/trace_defs.h>
69
70
71
72
73#define TRACE_EVENT(s,e,a) ((uint64_t)(s)<<48|(uint64_t)(e)<<32|(a))
74
75/* XXX: this is a temp kludge. The tracing code wants to allocate a fixed buffer
76 * for every possible core ID, but this is now too large for sanity, so I've
77 * limited it here. -AB 20111229
78 */
79
80struct trace_buffer;
81
82#define TRACE_COREID_LIMIT        32
83#define TRACE_EVENT_SIZE          16
84#define TRACE_MAX_EVENTS          20000        // max number of events
85#define TRACE_MAX_APPLICATIONS    128
86//#define TRACE_PERCORE_BUF_SIZE    0x1ff00
87#define TRACE_PERCORE_BUF_SIZE    (TRACE_EVENT_SIZE * TRACE_MAX_EVENTS + (sizeof (struct trace_buffer)))
88
89#define TRACE_BUF_SIZE (TRACE_COREID_LIMIT*TRACE_PERCORE_BUF_SIZE)    // Size of all trace buffers
90
91// Size of the array storing which subsystems are enabled
92#define TRACE_SUBSYS_ENABLED_BUF_SIZE (TRACE_NUM_SUBSYSTEMS * sizeof(bool))
93
94#define TRACE_ALLOC_SIZE (TRACE_BUF_SIZE + TRACE_SUBSYS_ENABLED_BUF_SIZE)
95
96#define TRACE_MAX_BOOT_APPLICATIONS 16
97
98// A macro to simplify calling of trace_event
99// e.g., do TRACE(BENCH, START, 0)
100// instead of
101// trace_event(TRACE_SUBSYS_BENCH, TRACE_EVENT_BENCH_START, 0)
102#define TRACE(s, e, a) trace_event(TRACE_SUBSYS_##s, TRACE_EVENT_##s##_##e, a)
103
104// This is useful for trace points that want to use a cte pointer as the trace
105// argument.  This could be wrapped in an IN_KERNEL #ifdef block, but then
106// we would have to special case TRACE-points in libmdb userspace builds.
107struct cte;
108static inline uint32_t cte_to_trace_id(struct cte *cte)
109{
110    uintptr_t cteptr = (uintptr_t)cte;
111    // cut off zero bits from struct size
112    cteptr >>= OBJBITS_CTE;
113    // mask off uninteresting high bits (29) and msb three bits of kernel
114    // address space. This will map 8 ctes onto the same trace id, but
115    // collisions should be fairly rare.
116    return cteptr & MASK(32);
117}
118#define TRACE_CTE(s, e, a) TRACE(s, e, cte_to_trace_id(a))
119
120#if defined(__x86_64__)
121// for rdtsc()
122#include <arch/x86/barrelfish_kpi/asm_inlines_arch.h>
123
124#define TRACE_TIMESTAMP() rdtsc()
125
126/*
127 * \brief compare and set. If the value at address
128 *        equals old, set it to new and return true,
129 *        otherwise don't write to address and return false
130 *
131 * NOTE: This is only used by threads on the same core so no lock prefix
132 */
133static inline bool trace_cas(volatile uintptr_t *address, uintptr_t old,
134                             uintptr_t nw)
135{
136    register bool res;
137    __asm volatile("cmpxchgq %2,%0     \n\t"
138                   "setz %1            \n\t"
139                   : "+m" (*address), "=q" (res)
140                   : "r" (nw), "a" (old)
141                   : "memory");
142    return res;
143}
144
145
146#elif defined(__i386__) || defined(__arm__) || defined(__aarch64__)
147
148static inline bool trace_cas(volatile uintptr_t *address, uintptr_t old,
149                             uintptr_t nw)
150{
151    return false;
152}
153
154#define TRACE_TIMESTAMP() 0
155
156#else
157
158#warning You need to supply CAS and a timestamp function for this architecture.
159
160#endif
161
162
163/// Trace event
164struct trace_event {
165    uint64_t timestamp;
166    union {
167        uint64_t raw;
168        // ... stuff that is embedded in the event
169        struct {
170            uint32_t word1;
171            uint32_t word2;
172        } w32;
173        struct {
174            uint32_t arg;
175            uint16_t event;
176            uint16_t subsystem;
177        } ev;
178    } u;
179};
180
181// Trace information about an application
182struct trace_application {
183    char name[8]; ///< Name of the application
184    uint64_t dcb; ///< DCB address of the application
185};
186
187/// Trace buffer
188struct trace_buffer {
189    volatile uintptr_t head_index;
190    volatile uintptr_t tail_index;
191
192    // ... flags...
193    struct trace_buffer *master;       // Pointer to the trace master
194    volatile bool     running;
195    volatile bool     autoflush;       // Are we flushing automatically?
196    volatile uint64_t start_trigger;
197    volatile uint64_t stop_trigger;
198    volatile uint64_t stop_time;
199    int64_t           t_offset;           // Time offset relative to core 0
200    uint64_t          t0;              // Start time of trace
201    uint64_t          duration;        // Max trace duration
202    uint64_t          event_counter;        // Max number of events in trace
203
204    // ... events ...
205    struct trace_event events[TRACE_MAX_EVENTS];
206
207    // ... applications ...
208    volatile uint8_t num_applications;
209    struct trace_application applications[TRACE_MAX_APPLICATIONS];
210};
211
212typedef errval_t (* trace_conditional_termination_t)(bool forced);
213
214static __attribute__((unused)) trace_conditional_termination_t
215    cond_termination = NULL;
216
217#ifndef IN_KERNEL
218
219extern lvaddr_t trace_buffer_master;
220extern lvaddr_t trace_buffer_va;
221struct cnoderef;
222
223errval_t trace_init(void);
224errval_t trace_disable_domain(void);
225void trace_reset_buffer(void);
226void trace_reset_all(void);
227errval_t trace_setup_on_core(struct capref *retcap);
228errval_t trace_setup_child(struct cnoderef taskcn,
229                           dispatcher_handle_t handle);
230errval_t trace_control(uint64_t start_trigger,
231                       uint64_t stop_trigger,
232                       uint64_t duration);
233errval_t trace_control_fixed_events_counter(uint64_t start_trigger,
234                       uint64_t stop_trigger,
235                       uint64_t duration,
236                       uint64_t event_counter);
237errval_t trace_wait(void);
238size_t trace_get_event_count(coreid_t specified_core);
239errval_t trace_conditional_termination(bool forced);
240size_t trace_dump(char *buf, size_t buflen, int *number_of_events);
241size_t trace_dump_core(char *buf, size_t buflen, size_t *usedBytes,
242        int *number_of_events_dumped, coreid_t specified_core,
243        bool first_dump, bool isOnlyOne);
244void trace_flush(struct event_closure callback);
245void trace_set_autoflush(bool enabled);
246errval_t trace_prepare(struct event_closure callback);
247errval_t trace_my_setup(void);
248
249errval_t trace_set_subsys_enabled(uint16_t subsys, bool enabled);
250errval_t trace_set_all_subsys_enabled(bool enabled);
251
252
253
254/**
255 * \brief Compute fixed trace buffer address according to
256 * given core_id
257 *
258 * Each core gets its own top-level page table entry, so to use a
259 * fixed address need to compute it as an offset from core id.
260 *
261 * Once we've computed this for a domain, we store it in
262 * the dispatcher.
263 *
264 */
265static inline lvaddr_t compute_trace_buf_addr(uint8_t core_id)
266{
267    assert(core_id < TRACE_COREID_LIMIT);
268    lvaddr_t addr = trace_buffer_master + (core_id * TRACE_PERCORE_BUF_SIZE);
269
270    return addr;
271}
272
273
274static inline void set_cond_termination(trace_conditional_termination_t f_ptr)
275{
276    cond_termination  = f_ptr;
277}
278
279#endif // NOT IN_KERNEL
280
281void trace_init_disp(void);
282
283/**
284 * \brief Reserve a slot in the trace buffer and write the event.
285 *
286 * Returns the slot index that was written.
287 * Lock-free implementation.
288 *
289 */
290static inline uintptr_t
291trace_reserve_and_fill_slot(struct trace_event *ev,
292                            struct trace_buffer *buf)
293{
294    uintptr_t i, nw;
295    struct trace_event *slot;
296
297    do {
298        i = buf->head_index;
299
300        if (buf->tail_index - buf->head_index == 1 ||
301                (buf->tail_index == 0 && (buf->head_index == TRACE_MAX_EVENTS-1))) {
302            // Buffer is full, overwrite last event
303            break;
304        }
305
306        nw = (i + 1) % TRACE_MAX_EVENTS;
307
308    } while (!trace_cas(&buf->head_index, i, nw));
309
310    // Write the event
311    slot = &buf->events[i];
312    *slot = *ev;
313
314    return i;
315}
316
317/**
318 * \brief Write a trace event to the buffer for the current core.
319 *
320 * Tracing must have been set up by parent of the current domain
321 * (by calling trace_setup_child).
322 *
323 * The per-core buffer must have already been initialized by
324 * the monitor (by calling trace_setup_on_core).
325 */
326
327#ifndef IN_KERNEL
328
329static inline coreid_t get_my_core_id(void)
330{
331    // FIXME: This call is not safe.  Figure out better way to do this
332    // WARNING: Be very careful about using get_my_core_id function
333    // as this function depends on working of disp pointers and they dont work
334    // in thread disabled mode when you are about to return to kernel with
335    // sys_yield.
336    return disp_get_core_id();
337}
338#endif // IN_KERNEL
339
340
341#ifdef IN_KERNEL
342
343static inline coreid_t get_my_core_id(void)
344{
345    return my_core_id;
346}
347
348// Kernel-version: uses the global trace buffer variable
349static inline errval_t trace_write_event(struct trace_event *ev)
350{
351#ifdef TRACING_EXISTS
352    struct trace_buffer *master = (struct trace_buffer *)kernel_trace_buf;
353
354    if (kernel_trace_buf == 0 || my_core_id >= TRACE_COREID_LIMIT) {
355        return TRACE_ERR_NO_BUFFER;
356    }
357
358    if (!master->running) {
359        if (ev->u.raw == master->start_trigger) {
360            master->start_trigger = 0;
361            master->t0 = ev->timestamp;
362            if (master->duration)
363                master->stop_time = ev->timestamp + master->duration;
364            master->running = true;
365        } else {
366            return SYS_ERR_OK;
367        }
368    }
369    struct trace_buffer *trace_buf = (struct trace_buffer*) (kernel_trace_buf
370            + my_core_id * TRACE_PERCORE_BUF_SIZE);
371    (void) trace_reserve_and_fill_slot(ev, trace_buf);
372
373    if (ev->u.raw == master->stop_trigger ||
374            (ev->timestamp>>63 == 0 &&  // Not a DCB event
375             ev->timestamp > master->stop_time)) {
376        master->stop_trigger = 0;
377        master->running = false;
378    }
379#endif // TRACING_EXISTS
380    return SYS_ERR_OK;
381}
382
383// Call this function when a new application has been created.
384// dcb: pointer to the domain control block struct of the application.
385static inline errval_t trace_new_application(char *new_application_name, uintptr_t dcb)
386{
387#ifdef TRACING_EXISTS
388
389    if (kernel_trace_buf == 0 || my_core_id >= TRACE_COREID_LIMIT) {
390        return TRACE_ERR_NO_BUFFER;
391    }
392
393    struct trace_buffer *trace_buf = (struct trace_buffer*) (kernel_trace_buf
394            + my_core_id * TRACE_PERCORE_BUF_SIZE);
395
396    int i;
397    int new_value;
398    do {
399        i = trace_buf->num_applications;
400
401        if (i == TRACE_MAX_APPLICATIONS)
402            return -1; // TODO error code
403
404        new_value = i + 1;
405
406    } while (!trace_cas((uintptr_t*)&trace_buf->num_applications, i, new_value));
407
408    trace_buf->applications[i].dcb = (uint64_t) dcb;
409    memcpy(&trace_buf->applications[i].name, new_application_name, 8);
410
411#endif // TRACING_EXISTS
412    return SYS_ERR_OK;
413}
414
415// During boot of a core the trace buffer is not yet mapped, but we still want
416// to store the applications that are started at this time. This would be fixed
417// if the Kernel would be responsible for mapping the trace buffer, but currently
418// it's the job of the monitor.
419
420extern struct trace_application kernel_trace_boot_applications[];
421extern int kernel_trace_num_boot_applications;
422
423static inline void trace_new_boot_application(char* name, uintptr_t dcb)
424{
425#if defined(TRACING_EXISTS)
426    if (kernel_trace_num_boot_applications < TRACE_MAX_BOOT_APPLICATIONS) {
427
428        kernel_trace_boot_applications[kernel_trace_num_boot_applications].dcb = (uint64_t) dcb;
429        memcpy(kernel_trace_boot_applications[kernel_trace_num_boot_applications].name, name, 8);
430
431        kernel_trace_num_boot_applications++;
432    }
433#endif
434}
435
436static inline void trace_copy_boot_applications(void)
437{
438#if defined(TRACING_EXISTS)
439    for (int i = 0; i < kernel_trace_num_boot_applications; i++) {
440        trace_new_application(kernel_trace_boot_applications[i].name, kernel_trace_boot_applications[i].dcb);
441    }
442#endif
443}
444#else // !IN_KERNEL
445
446// User-space version: gets trace buffer pointer out of the current dispatcher
447static inline errval_t trace_write_event(struct trace_event *ev)
448{
449#ifdef TRACING_EXISTS
450    dispatcher_handle_t handle = curdispatcher();
451
452    if (((uintptr_t)handle) == ((uintptr_t)NULL)) {
453        // FIXME: should return TRACE_ERR_NOT_VALID_HANDLE
454        return TRACE_ERR_NO_BUFFER;
455    }
456
457    struct dispatcher_generic *disp = get_dispatcher_generic(handle);
458
459    if (disp == NULL) {
460        // FIXME: should return TRACE_ERR_NOT_VALID_DISP
461        return TRACE_ERR_NO_BUFFER;
462    }
463
464    struct trace_buffer *trace_buf = disp->trace_buf;
465
466    if (trace_buf == NULL) {
467        return TRACE_ERR_NO_BUFFER;
468    }
469
470    struct trace_buffer *master = (struct trace_buffer*)trace_buffer_master;
471    //struct trace_buffer *master = trace_buf->master;
472    if (master == NULL) {
473        return TRACE_ERR_NO_BUFFER;
474    }
475
476    if (!master->running) {
477        if (ev->u.raw == master->start_trigger) {
478            master->start_trigger = 0;
479            master->t0 = ev->timestamp;
480            if (master->duration != 0) {
481                master->stop_time = master->t0 + master->duration;
482            }
483            master->running = true;
484
485            // Make sure the trigger event is first in the buffer
486            (void) trace_reserve_and_fill_slot(ev, trace_buf);
487            return SYS_ERR_OK;
488
489        } else {
490            return SYS_ERR_OK;
491        }
492    }
493    (void) trace_reserve_and_fill_slot(ev, trace_buf);
494
495    if (ev->u.raw == master->stop_trigger ||
496            ev->timestamp > master->stop_time) {
497        master->stop_trigger = 0;
498        master->running = false;
499    }
500
501#endif // TRACING_EXISTS
502
503    return SYS_ERR_OK;
504}
505
506
507
508#endif // !IN_KERNEL
509
510
511
512/**
513 * \brief Convenience wrapper to Write a trace event
514 *
515 */
516static inline errval_t trace_event_raw(uint64_t raw)
517{
518#ifdef CONFIG_TRACE
519
520#if TRACE_ONLY_SUB_NET
521    /* we do not want the stats about actual messages sent */
522//    return SYS_ERR_OK;
523#endif // TRACE_ONLY_SUB_NET
524
525
526    struct trace_event ev;
527    ev.timestamp = TRACE_TIMESTAMP();
528    ev.u.raw = raw;
529    return trace_write_event(&ev);
530#else
531    return SYS_ERR_OK;
532#endif
533}
534
535#ifdef TRACING_EXISTS
536#include <stdio.h>
537/// Is the subsystem enabled, i.e. should we log events for it?
538static inline bool trace_is_subsys_enabled(uint16_t subsys)
539{
540#ifdef CONFIG_TRACE
541    assert(subsys < TRACE_NUM_SUBSYSTEMS);
542
543    uint8_t* base_pointer;
544#ifdef IN_KERNEL
545    base_pointer = (uint8_t*) kernel_trace_buf;
546#else // !IN_KERNEL
547    base_pointer = (uint8_t*) trace_buffer_master;
548#endif // !IN_KERNEL
549
550    if (base_pointer == NULL) {
551        // The trace buffer is not even mapped.
552        return false;
553    }
554
555    bool* subsystem_states = (bool*) (base_pointer + TRACE_BUF_SIZE);
556
557    return subsystem_states[subsys];
558#else // !CONFIG_TRACE
559    return false;
560#endif // !CONFIG_TRACE
561}
562#endif // TRACING_EXISTS
563
564
565
566
567static inline errval_t trace_event(uint16_t subsys, uint16_t event, uint32_t arg)
568{
569#ifdef CONFIG_TRACE
570
571    // Check if the subsystem is enabled, i.e. we log events for it
572    if (!trace_is_subsys_enabled(subsys)) {
573        return TRACE_ERR_SUBSYS_DISABLED;
574    }
575
576    struct trace_event ev;
577    ev.timestamp = TRACE_TIMESTAMP();
578    ev.u.ev.subsystem = subsys;
579    ev.u.ev.event     = event;
580    ev.u.ev.arg       = arg;
581
582
583    if (cond_termination != NULL) {
584        cond_termination(false);
585   }
586
587    return trace_write_event(&ev);
588#else
589    return SYS_ERR_OK;
590#endif
591}
592
593#endif // LIBBARRELFISH_TRACE_H
594