1/**
2 * \file
3 * \brief User and kernel code definitions for system-wide tracing
4 */
5
6/*
7 * Copyright (c) 2007-2010, ETH Zurich.
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP.
9 * All rights reserved.
10 *
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
14 */
15
16#ifndef LIBBARRELFISH_TRACE_H
17#define LIBBARRELFISH_TRACE_H
18
19#if defined(__x86_64__)
20#define TRACING_EXISTS 1
21#endif
22
23
24#ifndef IN_KERNEL
25/* XXX: private includes from libbarrelfish */
26#include <barrelfish/dispatcher_arch.h>
27#include <barrelfish/curdispatcher_arch.h>
28#else // IN_KERNEL
29#if defined(__x86_64__) || defined(__i386__)
30#include <arch/x86/apic.h> // XXX!
31#endif // __x86_64__ || __i386__
32#endif // IN_KERNEL
33
34#include <barrelfish/sys_debug.h>
35#include <barrelfish/waitset.h> // struct event_closure
36#include <barrelfish/domain.h>
37
38#include <stdio.h>
39#include <string.h> // memcpy
40
41/*
42 * turn some tracing on or off
43 */
44#define TRACE_THREADS
45#define TRACE_CSWITCH
46//#define NETWORK_LLSTACK_TRACE 1
47
48/* Trace only network related events
49 * This will reduce the amount of events recorded, and hence allows
50 * recording for longer time. */
51#if CONFIG_TRACE && NETWORK_STACK_TRACE
52#define TRACE_ONLY_SUB_NET 1
53#endif // CONFIG_TRACE && NETWORK_STACK_TRACE
54
55#if CONFIG_TRACE && NETWORK_LLSTACK_TRACE
56#define TRACE_ONLY_LLNET 1
57#endif // CONFIG_TRACE && NETWORK_LLSTACK_TRACE
58
59
60#define CONSOLE_DUMP_BUFLEN (2<<20)
61
62/**
63 * The constants for the subsystems and events are generated from the file
64 * trace_definitions/trace_defs.pleco that can be included after compiling with
65 * #include <trace_definitions/trace_defs.h>
66 * .
67 */
68#include <trace_definitions/trace_defs.h>
69
70
71
72
73#define TRACE_EVENT(s,e,a) ((uint64_t)(s)<<48|(uint64_t)(e)<<32|(a))
74
75/* XXX: this is a temp kludge. The tracing code wants to allocate a fixed buffer
76 * for every possible core ID, but this is now too large for sanity, so I've
77 * limited it here. -AB 20111229
78 */
79
80struct trace_buffer;
81
82#define TRACE_COREID_LIMIT        32
83#define TRACE_EVENT_SIZE          16
84#define TRACE_MAX_EVENTS          20000        // max number of events
85#define TRACE_MAX_APPLICATIONS    128
86//#define TRACE_PERCORE_BUF_SIZE    0x1ff00
87#define TRACE_PERCORE_BUF_SIZE    (TRACE_EVENT_SIZE * TRACE_MAX_EVENTS + (sizeof (struct trace_buffer)))
88
89#define TRACE_BUF_SIZE (TRACE_COREID_LIMIT*TRACE_PERCORE_BUF_SIZE)    // Size of all trace buffers
90
91// Size of the array storing which subsystems are enabled
92#define TRACE_SUBSYS_ENABLED_BUF_SIZE (TRACE_NUM_SUBSYSTEMS * sizeof(bool))
93
94#define TRACE_ALLOC_SIZE (TRACE_BUF_SIZE + TRACE_SUBSYS_ENABLED_BUF_SIZE)
95
96#define TRACE_MAX_BOOT_APPLICATIONS 16
97
98// A macro to simplify calling of trace_event
99// e.g., do TRACE(BENCH, START, 0)
100// instead of
101// trace_event(TRACE_SUBSYS_BENCH, TRACE_EVENT_BENCH_START, 0)
102#define TRACE(s, e, a) trace_event(TRACE_SUBSYS_##s, TRACE_EVENT_##s##_##e, a)
103
104#if defined(__x86_64__)
105// for rdtsc()
106#include <arch/x86/barrelfish_kpi/asm_inlines_arch.h>
107
108#define TRACE_TIMESTAMP() rdtsc()
109
110/*
111 * \brief compare and set. If the value at address
112 *        equals old, set it to new and return true,
113 *        otherwise don't write to address and return false
114 *
115 * NOTE: This is only used by threads on the same core so no lock prefix
116 */
117static inline bool trace_cas(volatile uintptr_t *address, uintptr_t old,
118                             uintptr_t nw)
119{
120    register bool res;
121    __asm volatile("cmpxchgq %2,%0     \n\t"
122                   "setz %1            \n\t"
123                   : "+m" (*address), "=q" (res)
124                   : "r" (nw), "a" (old)
125                   : "memory");
126    return res;
127}
128
129
130#elif defined(__i386__) || defined(__arm__) || defined(__aarch64__)
131
132static inline bool trace_cas(volatile uintptr_t *address, uintptr_t old,
133                             uintptr_t nw)
134{
135    return false;
136}
137
138#define TRACE_TIMESTAMP() 0
139
140#else
141
142#warning You need to supply CAS and a timestamp function for this architecture.
143
144#endif
145
146
147/// Trace event
148struct trace_event {
149    uint64_t timestamp;
150    union {
151        uint64_t raw;
152        // ... stuff that is embedded in the event
153        struct {
154            uint32_t word1;
155            uint32_t word2;
156        } w32;
157        struct {
158            uint32_t arg;
159            uint16_t event;
160            uint16_t subsystem;
161        } ev;
162    } u;
163};
164
165// Trace information about an application
166struct trace_application {
167    char name[8]; ///< Name of the application
168    uint64_t dcb; ///< DCB address of the application
169};
170
171/// Trace buffer
172struct trace_buffer {
173    volatile uintptr_t head_index;
174    volatile uintptr_t tail_index;
175
176    // ... flags...
177    struct trace_buffer *master;       // Pointer to the trace master
178    volatile bool     running;
179    volatile bool     autoflush;       // Are we flushing automatically?
180    volatile uint64_t start_trigger;
181    volatile uint64_t stop_trigger;
182    volatile uint64_t stop_time;
183    int64_t           t_offset;           // Time offset relative to core 0
184    uint64_t          t0;              // Start time of trace
185    uint64_t          duration;        // Max trace duration
186    uint64_t          event_counter;        // Max number of events in trace
187
188    // ... events ...
189    struct trace_event events[TRACE_MAX_EVENTS];
190
191    // ... applications ...
192    volatile uint8_t num_applications;
193    struct trace_application applications[TRACE_MAX_APPLICATIONS];
194};
195
196typedef errval_t (* trace_conditional_termination_t)(bool forced);
197
198static __attribute__((unused)) trace_conditional_termination_t
199    cond_termination = NULL;
200
201#ifndef IN_KERNEL
202
203extern lvaddr_t trace_buffer_master;
204extern lvaddr_t trace_buffer_va;
205struct cnoderef;
206
207errval_t trace_init(void);
208errval_t trace_disable_domain(void);
209void trace_reset_buffer(void);
210void trace_reset_all(void);
211errval_t trace_setup_on_core(struct capref *retcap);
212errval_t trace_setup_child(struct cnoderef taskcn,
213                           dispatcher_handle_t handle);
214errval_t trace_control(uint64_t start_trigger,
215                       uint64_t stop_trigger,
216                       uint64_t duration);
217errval_t trace_control_fixed_events_counter(uint64_t start_trigger,
218                       uint64_t stop_trigger,
219                       uint64_t duration,
220                       uint64_t event_counter);
221errval_t trace_wait(void);
222size_t trace_get_event_count(coreid_t specified_core);
223errval_t trace_conditional_termination(bool forced);
224size_t trace_dump(char *buf, size_t buflen, int *number_of_events);
225size_t trace_dump_core(char *buf, size_t buflen, size_t *usedBytes,
226        int *number_of_events_dumped, coreid_t specified_core,
227        bool first_dump, bool isOnlyOne);
228void trace_flush(struct event_closure callback);
229void trace_set_autoflush(bool enabled);
230errval_t trace_prepare(struct event_closure callback);
231errval_t trace_my_setup(void);
232
233errval_t trace_set_subsys_enabled(uint16_t subsys, bool enabled);
234errval_t trace_set_all_subsys_enabled(bool enabled);
235
236
237
238/**
239 * \brief Compute fixed trace buffer address according to
240 * given core_id
241 *
242 * Each core gets its own top-level page table entry, so to use a
243 * fixed address need to compute it as an offset from core id.
244 *
245 * Once we've computed this for a domain, we store it in
246 * the dispatcher.
247 *
248 */
249static inline lvaddr_t compute_trace_buf_addr(uint8_t core_id)
250{
251    assert(core_id < TRACE_COREID_LIMIT);
252    lvaddr_t addr = trace_buffer_master + (core_id * TRACE_PERCORE_BUF_SIZE);
253
254    return addr;
255}
256
257
258static inline void set_cond_termination(trace_conditional_termination_t f_ptr)
259{
260    cond_termination  = f_ptr;
261}
262
263#endif // NOT IN_KERNEL
264
265void trace_init_disp(void);
266
267/**
268 * \brief Reserve a slot in the trace buffer and write the event.
269 *
270 * Returns the slot index that was written.
271 * Lock-free implementation.
272 *
273 */
274static inline uintptr_t
275trace_reserve_and_fill_slot(struct trace_event *ev,
276                            struct trace_buffer *buf)
277{
278    uintptr_t i, nw;
279    struct trace_event *slot;
280
281    do {
282        i = buf->head_index;
283
284        if (buf->tail_index - buf->head_index == 1 ||
285                (buf->tail_index == 0 && (buf->head_index == TRACE_MAX_EVENTS-1))) {
286            // Buffer is full, overwrite last event
287            break;
288        }
289
290        nw = (i + 1) % TRACE_MAX_EVENTS;
291
292    } while (!trace_cas(&buf->head_index, i, nw));
293
294    // Write the event
295    slot = &buf->events[i];
296    *slot = *ev;
297
298    return i;
299}
300
301/**
302 * \brief Write a trace event to the buffer for the current core.
303 *
304 * Tracing must have been set up by parent of the current domain
305 * (by calling trace_setup_child).
306 *
307 * The per-core buffer must have already been initialized by
308 * the monitor (by calling trace_setup_on_core).
309 */
310
311#ifndef IN_KERNEL
312
313static inline coreid_t get_my_core_id(void)
314{
315    // FIXME: This call is not safe.  Figure out better way to do this
316    // WARNING: Be very careful about using get_my_core_id function
317    // as this function depends on working of disp pointers and they dont work
318    // in thread disabled mode when you are about to return to kernel with
319    // sys_yield.
320    return disp_get_core_id();
321}
322#endif // IN_KERNEL
323
324
325#ifdef IN_KERNEL
326
327static inline coreid_t get_my_core_id(void)
328{
329    return my_core_id;
330}
331
332// Kernel-version: uses the global trace buffer variable
333static inline errval_t trace_write_event(struct trace_event *ev)
334{
335#ifdef TRACING_EXISTS
336    struct trace_buffer *master = (struct trace_buffer *)kernel_trace_buf;
337
338    if (kernel_trace_buf == 0 || my_core_id >= TRACE_COREID_LIMIT) {
339        return TRACE_ERR_NO_BUFFER;
340    }
341
342    if (!master->running) {
343        if (ev->u.raw == master->start_trigger) {
344            master->start_trigger = 0;
345            master->t0 = ev->timestamp;
346            if (master->duration)
347                master->stop_time = ev->timestamp + master->duration;
348            master->running = true;
349        } else {
350            return SYS_ERR_OK;
351        }
352    }
353    struct trace_buffer *trace_buf = (struct trace_buffer*) (kernel_trace_buf
354            + my_core_id * TRACE_PERCORE_BUF_SIZE);
355    (void) trace_reserve_and_fill_slot(ev, trace_buf);
356
357    if (ev->u.raw == master->stop_trigger ||
358            (ev->timestamp>>63 == 0 &&  // Not a DCB event
359             ev->timestamp > master->stop_time)) {
360        master->stop_trigger = 0;
361        master->running = false;
362    }
363#endif // TRACING_EXISTS
364    return SYS_ERR_OK;
365}
366
367// Call this function when a new application has been created.
368// dcb: pointer to the domain control block struct of the application.
369static inline errval_t trace_new_application(char *new_application_name, uintptr_t dcb)
370{
371#ifdef TRACING_EXISTS
372
373    if (kernel_trace_buf == 0 || my_core_id >= TRACE_COREID_LIMIT) {
374        return TRACE_ERR_NO_BUFFER;
375    }
376
377    struct trace_buffer *trace_buf = (struct trace_buffer*) (kernel_trace_buf
378            + my_core_id * TRACE_PERCORE_BUF_SIZE);
379
380    int i;
381    int new_value;
382    do {
383        i = trace_buf->num_applications;
384
385        if (i == TRACE_MAX_APPLICATIONS)
386            return -1; // TODO error code
387
388        new_value = i + 1;
389
390    } while (!trace_cas((uintptr_t*)&trace_buf->num_applications, i, new_value));
391
392    trace_buf->applications[i].dcb = (uint64_t) dcb;
393    memcpy(&trace_buf->applications[i].name, new_application_name, 8);
394
395#endif // TRACING_EXISTS
396    return SYS_ERR_OK;
397}
398
399// During boot of a core the trace buffer is not yet mapped, but we still want
400// to store the applications that are started at this time. This would be fixed
401// if the Kernel would be responsible for mapping the trace buffer, but currently
402// it's the job of the monitor.
403
404extern struct trace_application kernel_trace_boot_applications[];
405extern int kernel_trace_num_boot_applications;
406
407static inline void trace_new_boot_application(char* name, uintptr_t dcb)
408{
409#if defined(TRACING_EXISTS)
410    if (kernel_trace_num_boot_applications < TRACE_MAX_BOOT_APPLICATIONS) {
411
412        kernel_trace_boot_applications[kernel_trace_num_boot_applications].dcb = (uint64_t) dcb;
413        memcpy(kernel_trace_boot_applications[kernel_trace_num_boot_applications].name, name, 8);
414
415        kernel_trace_num_boot_applications++;
416    }
417#endif
418}
419
420static inline void trace_copy_boot_applications(void)
421{
422#if defined(TRACING_EXISTS)
423    for (int i = 0; i < kernel_trace_num_boot_applications; i++) {
424        trace_new_application(kernel_trace_boot_applications[i].name, kernel_trace_boot_applications[i].dcb);
425    }
426#endif
427}
428#else // !IN_KERNEL
429
430// User-space version: gets trace buffer pointer out of the current dispatcher
431static inline errval_t trace_write_event(struct trace_event *ev)
432{
433#ifdef TRACING_EXISTS
434    dispatcher_handle_t handle = curdispatcher();
435
436    if (((uintptr_t)handle) == ((uintptr_t)NULL)) {
437        // FIXME: should return TRACE_ERR_NOT_VALID_HANDLE
438        return TRACE_ERR_NO_BUFFER;
439    }
440
441    struct dispatcher_generic *disp = get_dispatcher_generic(handle);
442
443    if (disp == NULL) {
444        // FIXME: should return TRACE_ERR_NOT_VALID_DISP
445        return TRACE_ERR_NO_BUFFER;
446    }
447
448    struct trace_buffer *trace_buf = disp->trace_buf;
449
450    if (trace_buf == NULL) {
451        return TRACE_ERR_NO_BUFFER;
452    }
453
454    struct trace_buffer *master = (struct trace_buffer*)trace_buffer_master;
455    //struct trace_buffer *master = trace_buf->master;
456    if (master == NULL) {
457        return TRACE_ERR_NO_BUFFER;
458    }
459
460    if (!master->running) {
461        if (ev->u.raw == master->start_trigger) {
462            master->start_trigger = 0;
463            master->t0 = ev->timestamp;
464            if (master->duration != 0) {
465                master->stop_time = master->t0 + master->duration;
466            }
467            master->running = true;
468
469            // Make sure the trigger event is first in the buffer
470            (void) trace_reserve_and_fill_slot(ev, trace_buf);
471            return SYS_ERR_OK;
472
473        } else {
474            return SYS_ERR_OK;
475        }
476    }
477    (void) trace_reserve_and_fill_slot(ev, trace_buf);
478
479    if (ev->u.raw == master->stop_trigger ||
480            ev->timestamp > master->stop_time) {
481        master->stop_trigger = 0;
482        master->running = false;
483    }
484
485#endif // TRACING_EXISTS
486
487    return SYS_ERR_OK;
488}
489
490
491
492#endif // !IN_KERNEL
493
494
495
496/**
497 * \brief Convenience wrapper to Write a trace event
498 *
499 */
500static inline errval_t trace_event_raw(uint64_t raw)
501{
502#ifdef CONFIG_TRACE
503
504#if TRACE_ONLY_SUB_NET
505    /* we do not want the stats about actual messages sent */
506//    return SYS_ERR_OK;
507#endif // TRACE_ONLY_SUB_NET
508
509
510    struct trace_event ev;
511    ev.timestamp = TRACE_TIMESTAMP();
512    ev.u.raw = raw;
513    return trace_write_event(&ev);
514#else
515    return SYS_ERR_OK;
516#endif
517}
518
519#ifdef TRACING_EXISTS
520#include <stdio.h>
521/// Is the subsystem enabled, i.e. should we log events for it?
522static inline bool trace_is_subsys_enabled(uint16_t subsys)
523{
524#ifdef CONFIG_TRACE
525    assert(subsys < TRACE_NUM_SUBSYSTEMS);
526
527    uint8_t* base_pointer;
528#ifdef IN_KERNEL
529    base_pointer = (uint8_t*) kernel_trace_buf;
530#else // !IN_KERNEL
531    base_pointer = (uint8_t*) trace_buffer_master;
532#endif // !IN_KERNEL
533
534    if (base_pointer == NULL) {
535        // The trace buffer is not even mapped.
536        return false;
537    }
538
539    bool* subsystem_states = (bool*) (base_pointer + TRACE_BUF_SIZE);
540
541    return subsystem_states[subsys];
542#else // !CONFIG_TRACE
543    return false;
544#endif // !CONFIG_TRACE
545}
546#endif // TRACING_EXISTS
547
548
549
550
551static inline errval_t trace_event(uint16_t subsys, uint16_t event, uint32_t arg)
552{
553#ifdef CONFIG_TRACE
554
555    // Check if the subsystem is enabled, i.e. we log events for it
556    if (!trace_is_subsys_enabled(subsys)) {
557        return TRACE_ERR_SUBSYS_DISABLED;
558    }
559
560    struct trace_event ev;
561    ev.timestamp = TRACE_TIMESTAMP();
562    ev.u.ev.subsystem = subsys;
563    ev.u.ev.event     = event;
564    ev.u.ev.arg       = arg;
565
566
567    if (cond_termination != NULL) {
568        cond_termination(false);
569   }
570
571    return trace_write_event(&ev);
572#else
573    return SYS_ERR_OK;
574#endif
575}
576
577#endif // LIBBARRELFISH_TRACE_H
578