1/*
2 * Copyright 2017, Data61
3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO)
4 * ABN 41 687 119 230.
5 *
6 * This software may be distributed and modified according to the terms of
7 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
8 * See "LICENSE_BSD2.txt" for details.
9 *
10 * @TAG(DATA61_BSD)
11 */
12#pragma once
13
14#include <stddef.h>
15#include <stdint.h>
16
17#include <sel4/sel4.h>
18#include <sel4bench/types.h>
19#include <sel4bench/arch/sel4bench.h>
20#include <utils/attribute.h>
21#include <utils/arith.h>
22
23/**
24 * @file
25 *
26 * libsel4bench is a library designed to abstract over the performance
27 * monitoring counters (PMCs) in modern IA-32 and ARM processors, so that you
28 * can measure the performance of your software.  It will also work out whether
29 * certain operations need to be done in kernel mode, and perform kernel code
30 * injection calls to make them happen.  As a result, expect that any library
31 * call could potentially result in a syscall.  (This is of particular note on
32 * the KZM/ARM1136, for which even reading the cycle counter must be done in
33 * kernel mode.)
34 *
35 * It also goes out of its way to ensure that there's always a cycle counter
36 * available for use.  `sel4bench_init()` will start this running, and
37 * `sel4bench_destroy()` will tear it down, if necessary.
38 *
39 * Notes:
40 * - Overflow is completely ignored, even on processors that only support
41 *   32-bit counters (and thus where there is space to overflow into).  If you
42 *   are doing something that might overflow a counter, it's up to you to deal
43 *   with that possibility.
44 * - Everything is zero-indexed.
45 */
46
47/*
48 * CPP constants for events that are common to all architecture variants.
49 *
50 * Additional events are architecture- (and potentially processor-) specific.
51 * These may be defined in architecture or processor header files.
52 */
53static UNUSED event_id_t GENERIC_EVENTS[] = {
54    SEL4BENCH_EVENT_CACHE_L1I_MISS,
55    SEL4BENCH_EVENT_CACHE_L1D_MISS,
56    SEL4BENCH_EVENT_TLB_L1I_MISS,
57    SEL4BENCH_EVENT_TLB_L1D_MISS,
58    SEL4BENCH_EVENT_EXECUTE_INSTRUCTION,
59    SEL4BENCH_EVENT_BRANCH_MISPREDICT,
60    SEL4BENCH_EVENT_MEMORY_ACCESS,
61};
62
63static UNUSED char *GENERIC_EVENT_NAMES[] = {
64    "L1 i-cache misses",
65    "L1 d-cache misses",
66    "L1 i-tlb misses",
67    "L1 d-tlb misses",
68    "Instructions",
69    "Branch mispredictions",
70    "Memory accesses",
71};
72
73static_assert(ARRAY_SIZE(GENERIC_EVENTS) == ARRAY_SIZE(GENERIC_EVENT_NAMES),
74              "event names same length as counters");
75
76/* Number of generic counters */
77#define SEL4BENCH_NUM_GENERIC_EVENTS ARRAY_SIZE(GENERIC_EVENTS)
78
79/**
80 * Initialise the sel4bench library.  Nothing else is guaranteed to work, and
81 * may produce strange failures, if you don't do this first.
82 *
83 * Starts the cycle counter, which is guaranteed to run until
84 * `sel4bench_destroy()` is called.
85 */
86static UNUSED void sel4bench_init();
87
88/**
89 * Tear down the sel4bench library.  Nothing else is guaranteed to work, and may
90 * produce strange failures, after you do this.
91 */
92static UNUSED void sel4bench_destroy();
93
94/**
95 * Query the cycle counter.  If said counter needs starting, `sel4bench_init()`
96 * will have taken care of it.
97 *
98 * The returned cycle count might be since `sel4bench_init()`, if the cycle
99 * counter needs explicit starting, or since bootup, if it freewheels.
100 *
101 * @return current cycle count
102 */
103static UNUSED ccnt_t sel4bench_get_cycle_count();
104
105/**
106 * Query how many performance counters are supported on this CPU, excluding the
107 * cycle counter.
108 *
109 * Note that the return value is of type `seL4_Word`; consequently, this library
110 * supports a number of counters less than or equal to the machine word size in
111 * bits.
112
113 * @return quantity of counters on this CPU
114 */
115static UNUSED seL4_Word sel4bench_get_num_counters();
116
117/**
118 * Query the description of a counter.
119 *
120 * @param counter counter to query
121 *
122 * @return ASCII string representation of counter's description; `NULL` if
123 * counter does not exist
124 */
125const char *sel4bench_get_counter_description(counter_t counter);
126
127/**
128 * Query the value of a counter.
129 *
130 * @param counter counter to query
131 *
132 * @return counter value
133 */
134static UNUSED ccnt_t sel4bench_get_counter(counter_t counter);
135
136/**
137 * Query the value of a set of counters.
138 *
139 * `values` must point to an array of a length at least equal to the highest
140 * counter index to be read (to a maximum of `sel4bench_get_num_counters()`).
141 * Each counter to be read will be written to its corresponding index in this
142 * array.
143 *
144 * @param counters bitfield indicating which counter(s) in `values` to query
145 * @param values   array of counters
146 *
147 * @return current cycle count as in `sel4bench_get_cycle_count()`
148 */
149static UNUSED ccnt_t sel4bench_get_counters(counter_bitfield_t counters,
150                                            ccnt_t *values);
151
152/**
153 * Assign a counter to track a specific event.  Events are processor-specific,
154 * though some common ones might be exposed through preprocessor constants.
155 *
156 * @param counter counter to configure
157 * @param event   event to track
158 */
159static UNUSED void sel4bench_set_count_event(counter_t counter, event_id_t id);
160
161/**
162 * Start counting events on a set of performance counters.
163 *
164 * @param counters bitfield indicating which counter(s) to start
165 */
166static UNUSED void sel4bench_start_counters(counter_bitfield_t counters);
167
168/**
169 * Stop counting events on a set of performance counters.
170 *
171 * Note: Some processors (notably, the KZM/ARM1136) may not support this
172 * operation.
173 *
174 * @param counters bitfield indicating which counter(s) to stop
175 */
176static UNUSED void sel4bench_stop_counters(counter_bitfield_t counters);
177
178/**
179 * Reset all performance counters to zero.  Note that the cycle counter is not a
180 * performance counter, and is not reset.
181 *
182 */
183static UNUSED void sel4bench_reset_counters(void);
184
185/**
186 * Query the number of benchmark loops required to read a given number of
187 * events.
188 *
189 * @param n_counters number of counters available
190 * @param n_events   number of events of interest
191 *
192 * @return number of benchmark loops required
193 */
194static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters,
195                                                   seL4_Word n_events)
196{
197    return DIV_ROUND_UP(n_events, n_counters);
198}
199
200/**
201 * Enable a chunk of the event counters passed in.
202 *
203 * A "chunk" is a quantity of events not larger than the number of performance
204 * counters available.  Because we can be interested in more events than there
205 * are counters, the events are broken into numbered chunks (zero-indexed).  The
206 * quantity of chunks is ceil(n_events / n_counters).
207 *
208 * Imagine we had 10 events to track but n_counters was only 8 (i.e., an 8-bit
209 * machine).
210 *
211 *     +--chunk 1-+--chunk 0-+
212 *     | xxxxxxxx | xxxxxxxx |
213 *     +---------------------+
214 *
215 * sel4bench_enable_counters(10, events, 0, 8) would return 255:
216 *
217 *     +--chunk 1-+--chunk 0-+
218 *     | 00000000 | 11111111 |
219 *     +---------------------+
220 *
221 * sel4bench_enable_counters(10, events, 1, 8) would return 3:
222 *
223 *     +--chunk 1-+--chunk 0-+
224 *     | 00000011 | 00000000 |
225 *     +---------------------+
226 *
227 * `n_counters` is a parameter because calling `sel4bench_get_num_counters()`
228 * can be expensive, but it should be the same as the function's return value.
229 *
230 * @param n_events   number of events of interest
231 * @param event      events to track
232 * @param chunk      chunk number to enable
233 * @param n_counters number of counters available
234 *
235 * @return mask usable to manipulate the counters enabled
236 */
237static inline
238counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events,
239                                             event_id_t *events,
240                                             seL4_Word chunk,
241                                             seL4_Word n_counters)
242{
243    assert(chunk < sel4bench_get_num_counter_chunks(n_counters, n_events));
244    assert(n_counters == sel4bench_get_num_counters());
245    counter_bitfield_t mask = 0;
246
247    for (seL4_Word i = 0; i < n_counters; i++) {
248        seL4_Word counter = chunk * n_counters + i;
249        if (counter >= n_events) {
250            break;
251        }
252        sel4bench_set_count_event(i, events[counter]);
253        mask |= BIT(i);
254    }
255
256    sel4bench_reset_counters();
257    sel4bench_start_counters(mask);
258    return mask;
259}
260
261/**
262 * Read and stop the counters set in `mask`.
263 *
264 * `n_counters` is a parameter because calling `sel4bench_get_num_counters()`
265 * can be expensive, but it should be the same as the function's return value.
266 *
267 * `results` must point to an array the size of n_events, as passed to
268 * `sel4bench_enable_counters()`.
269 *
270 * @param mask       as returned by `sel4bench_enable_counters()`
271 * @param chunk      as passed to `sel4bench_enable_counters()`
272 * @param n_counters number of counters available
273 * @param results    array of counter results
274 */
275static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask,
276                                                    seL4_Word chunk,
277                                                    seL4_Word n_counters,
278                                                    ccnt_t results[])
279{
280    sel4bench_get_counters(mask, &results[chunk * n_counters]);
281    sel4bench_stop_counters(mask);
282}
283
284/**
285 * Call `sel4bench_enable_counters()` on the `GENERIC_EVENTS` supplied for all
286 * platforms by this library.
287 *
288 * See `sel4bench_enable_counters()` for parameters and return value.
289 */
290static inline counter_bitfield_t sel4bench_enable_generic_counters(
291    seL4_Word chunk, seL4_Word n_counters)
292{
293    return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS,
294                                     GENERIC_EVENTS, chunk, n_counters);
295}
296
297/**
298 * Call `sel4bench_get_num_counter_chunks()` for the `GENERIC_EVENTS` supplied
299 * for all platforms by this library.
300 *
301 * See `sel4bench_get_num_counter_chunks()` for parameters and return value.
302 */
303static inline int sel4bench_get_num_generic_counter_chunks(seL4_Word n_counters)
304{
305    return sel4bench_get_num_counter_chunks(n_counters,
306                                            SEL4BENCH_NUM_GENERIC_EVENTS);
307}
308