1/* 2 * Copyright 2017, Data61 3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO) 4 * ABN 41 687 119 230. 5 * 6 * This software may be distributed and modified according to the terms of 7 * the BSD 2-Clause license. Note that NO WARRANTY is provided. 8 * See "LICENSE_BSD2.txt" for details. 9 * 10 * @TAG(DATA61_BSD) 11 */ 12#pragma once 13 14#include <autoconf.h> 15#include <stdint.h> 16#include <utils/util.h> 17 18//function attributes 19//ultra-short, time-sensitive functions 20#define FASTFN inline __attribute__((always_inline)) 21 22//short, cache-sensitive functions (note: short means one cache line) 23#define CACHESENSFN __attribute__((noinline, aligned(64))) 24 25//functions that will be passed to seL4_DebugRun() -- fast, but obviously not inlined 26#define KERNELFN __attribute__((noinline, flatten)) 27 28#define IN_TX_BIT BIT(0) 29#define IN_TXCP_BIT BIT(1) 30 31#include "events.h" 32 33//CPUID leaf node numbers 34enum { 35 IA32_CPUID_LEAF_BASIC = 0, 36 IA32_CPUID_LEAF_MODEL = 1, 37 IA32_CPUID_LEAF_PMC = 0xA, 38 IA32_CPUID_LEAF_EXTENDED = 0x80000000, 39}; 40 41//CPUID.0 "GenuineIntel" 42#define IA32_CPUID_BASIC_MAGIC_EBX 0x756E6547 43#define IA32_CPUID_BASIC_MAGIC_ECX 0x6C65746E 44#define IA32_CPUID_BASIC_MAGIC_EDX 0x49656E69 45 46//CPUID.1 Family and Model ID macros and type. 47#define FAMILY(x) ( (x).family == 0xF ? ( (x).ex_family + (x).family) : (x).family ) 48#define MODEL(x) ( ((x).family == 0xF || (x).family == 0x6) ? (((x).ex_model << 4) + (x).model ) : (x).model ) 49#define IA32_CPUID_FAMILY_P6 0x6 50typedef union { 51 struct { 52 seL4_Word stepping : 4; 53 seL4_Word model : 4; 54 seL4_Word family : 4; 55 seL4_Word type : 2; 56 seL4_Word reserved1 : 2; 57 seL4_Word ex_model : 4; 58 seL4_Word ex_family : 8; 59 seL4_Word reserved2 : 4; 60 }; 61 uint32_t raw; 62} ia32_cpuid_model_info_t; 63 64//CPUID.PMC Performance-monitoring macros and types 65typedef union { 66 struct { 67 uint8_t pmc_version_id; 68 uint8_t gp_pmc_count_per_core; 69 uint8_t gp_pmc_bit_width; 70 uint8_t ebx_bit_vector_length; 71 }; 72 uint32_t raw; 73} ia32_cpuid_leaf_pmc_eax_t; 74 75//Control-register constants 76#define IA32_CR4_PCE 8 77 78//PMC MSRs 79#define IA32_MSR_PMC_PERFEVTSEL_BASE 0x186 80#define IA32_MSR_PMC_PERFEVTCNT_BASE 0x0C1 81typedef union { 82 struct { 83 uint16_t event; 84 union { 85 struct { 86 uint8_t USR : 1; 87 uint8_t OS : 1; 88 uint8_t E : 1; 89 uint8_t PC : 1; 90 uint8_t INT : 1; 91 uint8_t res : 1; 92 uint8_t EN : 1; 93 uint8_t INV : 1; 94 }; 95 uint8_t flags; 96 }; 97 uint8_t cmask; 98 }; 99 uint32_t raw; 100} ia32_pmc_perfevtsel_t; 101 102//Convenient execution of CPUID instruction. The first version isn't volatile, so is for querying the processor; the second version just serialises. 103//This looks slow, but gcc inlining is smart enough to optimise away all the memory references, and takes unused information into account. 104static FASTFN void sel4bench_private_cpuid(uint32_t leaf, uint32_t subleaf, uint32_t * eax, uint32_t * ebx, uint32_t * ecx, uint32_t * edx) 105{ 106 asm ( 107 "cpuid" 108 : "=a"(*eax) /* output eax */ 109 , "=b"(*ebx) /* output ebx */ 110 , "=c"(*ecx) /* output ecx */ 111 , "=d"(*edx) /* output edx */ 112 : "a" (leaf) /* input query leaf */ 113 , "c" (subleaf) /* input query subleaf */ 114 ); 115} 116static FASTFN void sel4bench_private_cpuid_serial() 117{ 118 //set leaf and subleaf to 0 for predictability 119 uint32_t eax = 0; 120 uint32_t ecx = 0; 121 asm volatile ( 122 "cpuid" 123 : "+a"(eax) /* eax = 0 and gets clobbered */ 124 , "+c"(ecx) /* ecx = 0 and gets clobbered */ 125 : /* no other inputs to this version */ 126 : "%ebx" /* clobber ebx */ 127 , "%edx" /* clobber edx */ 128 , "cc" /* clobber condition code */ 129 ); 130} 131static FASTFN void sel4bench_private_lfence() 132{ 133 asm volatile("lfence"); 134} 135 136static FASTFN uint64_t sel4bench_private_rdtsc() 137{ 138 uint32_t lo, hi; 139 asm volatile ( 140 "rdtsc" 141 : "=a"(lo), "=d"(hi) 142 ); 143 return (((uint64_t)hi << 32ull) | (uint64_t)lo); 144} 145 146static FASTFN uint64_t sel4bench_private_rdpmc(uint32_t counter) 147{ 148 uint32_t hi, lo; 149 asm volatile ( 150 "rdpmc" 151 : "=a"(lo), "=d"(hi) 152 : "c"(counter) 153 ); 154 return (((uint64_t)hi << 32ull) | (uint64_t)lo); 155} 156 157//Serialization instruction for before and after reading PMCs 158//See comment in arch/sel4bench.h for details. 159#ifdef SEL4BENCH_STRICT_PMC_SERIALIZATION 160#define sel4bench_private_serialize_pmc sel4bench_private_cpuid_serial 161#else //SEL4BENCH_STRICT_PMC_SERIALIZATION 162#define sel4bench_private_serialize_pmc sel4bench_private_lfence 163#endif //SEL4BENCH_STRICT_PMC_SERIALIZATION 164 165/* Hide these definitions if using kernel exported PMC to prevent warnings */ 166#ifndef CONFIG_EXPORT_PMC_USER 167//enable user-level pmc access 168static KERNELFN void sel4bench_private_enable_user_pmc(void* arg) 169{ 170#ifdef CONFIG_ARCH_X86_64 171 172 uint64_t dummy; 173 asm volatile ( 174 "movq %%cr4, %0;" 175 "orq %[pce], %0;" 176 "movq %0, %%cr4;" 177 : "=r" (dummy) 178 : [pce] "i" BIT(IA32_CR4_PCE) 179 : "cc" 180 ); 181#else 182 183 uint32_t dummy; 184 asm volatile ( 185 "movl %%cr4, %0;" /* read CR4 */ 186 "orl %[pce], %0;" /* enable PCE flag */ 187 "movl %0, %%cr4;" /* write CR4 */ 188 : "=r" (dummy) /* fake output to ask GCC for a register */ 189 : [pce] "i" BIT(IA32_CR4_PCE) /* input PCE flag */ 190 : "cc" /* clobber condition code */ 191 ); 192#endif 193} 194 195//disable user-level pmc access 196static KERNELFN void sel4bench_private_disable_user_pmc(void* arg) 197{ 198#ifdef CONFIG_ARCH_X86_64 199 uint64_t dummy; 200 asm volatile ( 201 "movq %%cr4, %0;" 202 "andq %[pce], %0;" 203 "movq %0, %%cr4;" 204 : "=r" (dummy) 205 : [pce] "i" (~BIT(IA32_CR4_PCE)) 206 : "cc" 207 ); 208 209#else 210 uint32_t dummy; 211 asm volatile ( 212 "movl %%cr4, %0;" /* read CR4 */ 213 "andl %[pce], %0;" /* enable PCE flag */ 214 "movl %0, %%cr4;" /* write CR4 */ 215 : "=r" (dummy) /* fake output to ask GCC for a register */ 216 : [pce] "i" (~BIT(IA32_CR4_PCE)) /* input PCE flag */ 217 : "cc" /* clobber condition code */ 218 ); 219#endif 220} 221#endif 222 223#ifndef CONFIG_KERNEL_X86_DANGEROUS_MSR 224//read an MSR 225static KERNELFN void sel4bench_private_rdmsr(void* arg) 226{ 227 uint32_t* msr = (uint32_t*)arg; 228 229 asm volatile ( 230 "rdmsr" 231 : "=a" (msr[1]) /* output low */ 232 , "=d" (msr[2]) /* output high */ 233 : "c" (msr[0]) /* input MSR index */ 234 ); 235} 236 237//write an MSR 238static KERNELFN void sel4bench_private_wrmsr(void* arg) 239{ 240 uint32_t* msr = (uint32_t*)arg; 241 242 asm volatile ( 243 "wrmsr" 244 : /* no output */ 245 : "a" (msr[1]) /* input low */ 246 , "d" (msr[2]) /* input high */ 247 , "c" (msr[0]) /* input MSR index */ 248 ); 249} 250#endif 251 252//generic event tables for lookup fn below 253//they use direct event numbers, rather than the constants in events.h, because it's smaller 254static seL4_Word SEL4BENCH_IA32_WESTMERE_EVENTS[5] = { 255 0x0280, //CACHE_L1I_MISS 256 0x0151, //CACHE_L1D_MISS, must use counter 0 or 1 257 0x20C8, //TLB_L1I_MISS 258 0x80CB, //TLB_L1D_MISS 259 0x5FCB //SEL4BENCH_IA32_WESTMERE_EVENT_CACHE_|{L1D_HIT,L2_HIT,L3P_HIT,L3_HIT,L3_MISS,LFB_HIT}_R 260}; 261static seL4_Word SEL4BENCH_IA32_NEHALEM_EVENTS[5] = { 262 0x0280, //CACHE_L1I_MISS 263 0x0151, //CACHE_L1D_MISS, must use counter 0 or 1 264 0x20C8, //TLB_L1I_MISS 265 0x80CB, //TLB_L1D_MISS 266 0x5FCB //SEL4BENCH_IA32_NEHALEM_EVENT_CACHE_|{L1D_HIT,L2_HIT,L3P_HIT,L3_HIT,L3_MISS,LFB_HIT}_R 267}; 268static seL4_Word SEL4BENCH_IA32_CORE2_EVENTS[5] = { 269 0x0081, //CACHE_L1I_MISS 270 0x01CB, //CACHE_L1D_MISS, must use counter 0 271 0x00C9, //TLB_L1I_MISS 272 0x10CB, //TLB_L1D_MISS, must use counter 0 273 0x03C0 //SEL4BENCH_IA32_CORE2_EVENT_RETIRE_MEMORY_|{READ,WRITE} 274}; 275static seL4_Word SEL4BENCH_IA32_CORE_EVENTS[5] = { 276 0x0081, //CACHE_L1I_MISS 277 0x0000, //CACHE_L1D_MISS, not available on CORE 278 0x0085, //TLB_L1I_MISS 279 0x0049, //TLB_L1D_MISS 280 0x0143 //MEMORY_ACCESS 281}; 282static seL4_Word SEL4BENCH_IA32_P6_EVENTS[5] = { 283 0x0081, //CACHE_L1I_MISS 284 0x0045, //CACHE_L1D_MISS 285 0x0085, //TLB_L1I_MISS 286 0x0000, //TLB_L1D_MISS, not available on P6 287 0x0043 //MEMORY_ACCESS 288}; 289static seL4_Word SEL4BENCH_IA32_HASWELL_EVENTS[5] = { 290 0x0280, //CACHE_L1I_MISS 291 0x0151, //CACHE_L1D_MISS, must use counter 0 or 1 292 0x0085, //TLB_L1I_MISS 293 0x0049, //TLB_L1D_MISS 294 0x412E //LLC_MISS 295}; 296static seL4_Word SEL4BENCH_IA32_BROADWELL_EVENTS[5] = { 297 0x0280, //ICACHE.MISSES 298 0x0151, //L1D.REPLACEMENT 299 0x2185, //ITLB_MISSES.MISS_CAUSES_A_WALK | ITLB_MISSES.STLB_HIT_4K 300 0x0000, //No combined load/store dTLB miss counter available 301 0x412E //LONGEST_LAT_CACHE.MISS 302}; 303static seL4_Word SEL4BENCH_IA32_SKYLAKE_EVENTS[5] = { 304 0x0000, //No combined tag/data iCache miss counter available 305 0x0151, //L1D.REPLACEMENT 306 0x2185, //ITLB_MISSES.MISS_CAUSES_A_WALK | ITLB_MISSES.STLB_HIT 307 0x0000, //No combined load/store dTLB miss counter available 308 0x412E //LONGEST_LAT_CACHE.MISS 309}; 310 311static FASTFN seL4_Word sel4bench_private_lookup_event(event_id_t event) 312{ 313 if ((SEL4BENCH_EVENT_GENERIC_MASK & event) == SEL4BENCH_EVENT_GENERIC_MASK) { 314 uint32_t dummy = 0; 315 ia32_cpuid_model_info_t model_info = { .raw = 0 }; 316 sel4bench_private_cpuid(IA32_CPUID_LEAF_MODEL, 0, &model_info.raw, &dummy, &dummy, &dummy); 317 318 //we should be a P6 319 assert(FAMILY(model_info) == IA32_CPUID_FAMILY_P6); 320 321 uint8_t model = MODEL(model_info); 322 event = event & ~SEL4BENCH_EVENT_GENERIC_MASK; 323 324 //Using the model summary on http://www.sandpile.org/x86/cpuid.htm#level_0000_0001h 325 //Let's hope it's accurate... 326 //We are also pretending Atoms don't exist 327 328 //P3 or PM 329 if (model <= 0x0D || model == 0x15) { 330 return SEL4BENCH_IA32_P6_EVENTS[event]; 331 } 332 333 switch (model) { 334 //CORE 335 case 0x0E: 336 return SEL4BENCH_IA32_CORE_EVENTS[event]; 337 338 //CORE2 339 case 0x0F: 340 case 0x16: 341 case 0x17: 342 case 0x1D: 343 return SEL4BENCH_IA32_CORE2_EVENTS[event]; 344 345 //NEHALEM 346 case 0x1A: 347 case 0x1E: 348 case 0x1F: 349 case 0x2E: 350 return SEL4BENCH_IA32_NEHALEM_EVENTS[event]; 351 352 //WESTMERE 353 case 0x25: 354 case 0x2C: 355 case 0x2F: 356 return SEL4BENCH_IA32_WESTMERE_EVENTS[event]; 357 358 //SANDY BRIDGE 359 case 0x2A: 360 case 0x2D: 361 return 0x0000; //TODO 362 363 //IVY BRIDGE 364 case 0x3A: 365 case 0x3E: 366 return 0x0000; //TODO 367 368 //HASWELL 369 case 0x3C: 370 case 0x3F: 371 case 0x45: 372 case 0x46: 373 return SEL4BENCH_IA32_HASWELL_EVENTS[event]; 374 375 //BROADWELL 376 case 0x3D: 377 case 0x47: 378 case 0x4F: 379 case 0x56: 380 return SEL4BENCH_IA32_BROADWELL_EVENTS[event]; 381 382 //SKYLAKE 383 case 0x4E: 384 case 0x5E: 385 return SEL4BENCH_IA32_SKYLAKE_EVENTS[event]; 386 387 //Unknown 388 default: 389 return 0x0000; 390 } 391 } else { 392 return event; 393 } 394} 395