1/* 2 * Copyright 2017, Data61 3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO) 4 * ABN 41 687 119 230. 5 * 6 * This software may be distributed and modified according to the terms of 7 * the BSD 2-Clause license. Note that NO WARRANTY is provided. 8 * See "LICENSE_BSD2.txt" for details. 9 * 10 * @TAG(DATA61_BSD) 11 */ 12#pragma once 13 14#include <stddef.h> 15#include <stdint.h> 16 17#include <sel4/sel4.h> 18#include <sel4bench/types.h> 19#include <sel4bench/arch/sel4bench.h> 20#include <utils/attribute.h> 21#include <utils/arith.h> 22 23/** 24 * @file 25 * 26 * libsel4bench is a library designed to abstract over the performance 27 * monitoring counters (PMCs) in modern IA-32 and ARM processors, so that you 28 * can measure the performance of your software. It will also work out whether 29 * certain operations need to be done in kernel mode, and perform kernel code 30 * injection calls to make them happen. As a result, expect that any library 31 * call could potentially result in a syscall. (This is of particular note on 32 * the KZM/ARM1136, for which even reading the cycle counter must be done in 33 * kernel mode.) 34 * 35 * It also goes out of its way to ensure that there's always a cycle counter 36 * available for use. `sel4bench_init()` will start this running, and 37 * `sel4bench_destroy()` will tear it down, if necessary. 38 * 39 * Notes: 40 * - Overflow is completely ignored, even on processors that only support 41 * 32-bit counters (and thus where there is space to overflow into). If you 42 * are doing something that might overflow a counter, it's up to you to deal 43 * with that possibility. 44 * - Everything is zero-indexed. 45 */ 46 47/* 48 * CPP constants for events that are common to all architecture variants. 49 * 50 * Additional events are architecture- (and potentially processor-) specific. 51 * These may be defined in architecture or processor header files. 52 */ 53static UNUSED event_id_t GENERIC_EVENTS[] = { 54 SEL4BENCH_EVENT_CACHE_L1I_MISS, 55 SEL4BENCH_EVENT_CACHE_L1D_MISS, 56 SEL4BENCH_EVENT_TLB_L1I_MISS, 57 SEL4BENCH_EVENT_TLB_L1D_MISS, 58 SEL4BENCH_EVENT_EXECUTE_INSTRUCTION, 59 SEL4BENCH_EVENT_BRANCH_MISPREDICT, 60 SEL4BENCH_EVENT_MEMORY_ACCESS, 61}; 62 63static UNUSED char *GENERIC_EVENT_NAMES[] = { 64 "L1 i-cache misses", 65 "L1 d-cache misses", 66 "L1 i-tlb misses", 67 "L1 d-tlb misses", 68 "Instructions", 69 "Branch mispredictions", 70 "Memory accesses", 71}; 72 73static_assert(ARRAY_SIZE(GENERIC_EVENTS) == ARRAY_SIZE(GENERIC_EVENT_NAMES), 74 "event names same length as counters"); 75 76/* Number of generic counters */ 77#define SEL4BENCH_NUM_GENERIC_EVENTS ARRAY_SIZE(GENERIC_EVENTS) 78 79/** 80 * Initialise the sel4bench library. Nothing else is guaranteed to work, and 81 * may produce strange failures, if you don't do this first. 82 * 83 * Starts the cycle counter, which is guaranteed to run until 84 * `sel4bench_destroy()` is called. 85 */ 86static UNUSED void sel4bench_init(); 87 88/** 89 * Tear down the sel4bench library. Nothing else is guaranteed to work, and may 90 * produce strange failures, after you do this. 91 */ 92static UNUSED void sel4bench_destroy(); 93 94/** 95 * Query the cycle counter. If said counter needs starting, `sel4bench_init()` 96 * will have taken care of it. 97 * 98 * The returned cycle count might be since `sel4bench_init()`, if the cycle 99 * counter needs explicit starting, or since bootup, if it freewheels. 100 * 101 * @return current cycle count 102 */ 103static UNUSED ccnt_t sel4bench_get_cycle_count(); 104 105/** 106 * Query how many performance counters are supported on this CPU, excluding the 107 * cycle counter. 108 * 109 * Note that the return value is of type `seL4_Word`; consequently, this library 110 * supports a number of counters less than or equal to the machine word size in 111 * bits. 112 113 * @return quantity of counters on this CPU 114 */ 115static UNUSED seL4_Word sel4bench_get_num_counters(); 116 117/** 118 * Query the description of a counter. 119 * 120 * @param counter counter to query 121 * 122 * @return ASCII string representation of counter's description; `NULL` if 123 * counter does not exist 124 */ 125const char *sel4bench_get_counter_description(counter_t counter); 126 127/** 128 * Query the value of a counter. 129 * 130 * @param counter counter to query 131 * 132 * @return counter value 133 */ 134static UNUSED ccnt_t sel4bench_get_counter(counter_t counter); 135 136/** 137 * Query the value of a set of counters. 138 * 139 * `values` must point to an array of a length at least equal to the highest 140 * counter index to be read (to a maximum of `sel4bench_get_num_counters()`). 141 * Each counter to be read will be written to its corresponding index in this 142 * array. 143 * 144 * @param counters bitfield indicating which counter(s) in `values` to query 145 * @param values array of counters 146 * 147 * @return current cycle count as in `sel4bench_get_cycle_count()` 148 */ 149static UNUSED ccnt_t sel4bench_get_counters(counter_bitfield_t counters, 150 ccnt_t *values); 151 152/** 153 * Assign a counter to track a specific event. Events are processor-specific, 154 * though some common ones might be exposed through preprocessor constants. 155 * 156 * @param counter counter to configure 157 * @param event event to track 158 */ 159static UNUSED void sel4bench_set_count_event(counter_t counter, event_id_t id); 160 161/** 162 * Start counting events on a set of performance counters. 163 * 164 * @param counters bitfield indicating which counter(s) to start 165 */ 166static UNUSED void sel4bench_start_counters(counter_bitfield_t counters); 167 168/** 169 * Stop counting events on a set of performance counters. 170 * 171 * Note: Some processors (notably, the KZM/ARM1136) may not support this 172 * operation. 173 * 174 * @param counters bitfield indicating which counter(s) to stop 175 */ 176static UNUSED void sel4bench_stop_counters(counter_bitfield_t counters); 177 178/** 179 * Reset all performance counters to zero. Note that the cycle counter is not a 180 * performance counter, and is not reset. 181 * 182 */ 183static UNUSED void sel4bench_reset_counters(void); 184 185/** 186 * Query the number of benchmark loops required to read a given number of 187 * events. 188 * 189 * @param n_counters number of counters available 190 * @param n_events number of events of interest 191 * 192 * @return number of benchmark loops required 193 */ 194static inline int sel4bench_get_num_counter_chunks(seL4_Word n_counters, 195 seL4_Word n_events) 196{ 197 return DIV_ROUND_UP(n_events, n_counters); 198} 199 200/** 201 * Enable a chunk of the event counters passed in. 202 * 203 * A "chunk" is a quantity of events not larger than the number of performance 204 * counters available. Because we can be interested in more events than there 205 * are counters, the events are broken into numbered chunks (zero-indexed). The 206 * quantity of chunks is ceil(n_events / n_counters). 207 * 208 * Imagine we had 10 events to track but n_counters was only 8 (i.e., an 8-bit 209 * machine). 210 * 211 * +--chunk 1-+--chunk 0-+ 212 * | xxxxxxxx | xxxxxxxx | 213 * +---------------------+ 214 * 215 * sel4bench_enable_counters(10, events, 0, 8) would return 255: 216 * 217 * +--chunk 1-+--chunk 0-+ 218 * | 00000000 | 11111111 | 219 * +---------------------+ 220 * 221 * sel4bench_enable_counters(10, events, 1, 8) would return 3: 222 * 223 * +--chunk 1-+--chunk 0-+ 224 * | 00000011 | 00000000 | 225 * +---------------------+ 226 * 227 * `n_counters` is a parameter because calling `sel4bench_get_num_counters()` 228 * can be expensive, but it should be the same as the function's return value. 229 * 230 * @param n_events number of events of interest 231 * @param event events to track 232 * @param chunk chunk number to enable 233 * @param n_counters number of counters available 234 * 235 * @return mask usable to manipulate the counters enabled 236 */ 237static inline 238counter_bitfield_t sel4bench_enable_counters(seL4_Word n_events, 239 event_id_t *events, 240 seL4_Word chunk, 241 seL4_Word n_counters) 242{ 243 assert(chunk < sel4bench_get_num_counter_chunks(n_counters, n_events)); 244 assert(n_counters == sel4bench_get_num_counters()); 245 counter_bitfield_t mask = 0; 246 247 for (seL4_Word i = 0; i < n_counters; i++) { 248 seL4_Word counter = chunk * n_counters + i; 249 if (counter >= n_events) { 250 break; 251 } 252 sel4bench_set_count_event(i, events[counter]); 253 mask |= BIT(i); 254 } 255 256 sel4bench_reset_counters(); 257 sel4bench_start_counters(mask); 258 return mask; 259} 260 261/** 262 * Read and stop the counters set in `mask`. 263 * 264 * `n_counters` is a parameter because calling `sel4bench_get_num_counters()` 265 * can be expensive, but it should be the same as the function's return value. 266 * 267 * `results` must point to an array the size of n_events, as passed to 268 * `sel4bench_enable_counters()`. 269 * 270 * @param mask as returned by `sel4bench_enable_counters()` 271 * @param chunk as passed to `sel4bench_enable_counters()` 272 * @param n_counters number of counters available 273 * @param results array of counter results 274 */ 275static inline void sel4bench_read_and_stop_counters(counter_bitfield_t mask, 276 seL4_Word chunk, 277 seL4_Word n_counters, 278 ccnt_t results[]) 279{ 280 sel4bench_get_counters(mask, &results[chunk * n_counters]); 281 sel4bench_stop_counters(mask); 282} 283 284/** 285 * Call `sel4bench_enable_counters()` on the `GENERIC_EVENTS` supplied for all 286 * platforms by this library. 287 * 288 * See `sel4bench_enable_counters()` for parameters and return value. 289 */ 290static inline counter_bitfield_t sel4bench_enable_generic_counters( 291 seL4_Word chunk, seL4_Word n_counters) 292{ 293 return sel4bench_enable_counters(SEL4BENCH_NUM_GENERIC_EVENTS, 294 GENERIC_EVENTS, chunk, n_counters); 295} 296 297/** 298 * Call `sel4bench_get_num_counter_chunks()` for the `GENERIC_EVENTS` supplied 299 * for all platforms by this library. 300 * 301 * See `sel4bench_get_num_counter_chunks()` for parameters and return value. 302 */ 303static inline int sel4bench_get_num_generic_counter_chunks(seL4_Word n_counters) 304{ 305 return sel4bench_get_num_counter_chunks(n_counters, 306 SEL4BENCH_NUM_GENERIC_EVENTS); 307} 308