1237434Skib/*- 2237434Skib * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> 3311927Skib * Copyright (c) 2016, 2017 The FreeBSD Foundation 4304285Skib * All rights reserved. 5237434Skib * 6304285Skib * Portions of this software were developed by Konstantin Belousov 7304285Skib * under sponsorship from the FreeBSD Foundation. 8304285Skib * 9237434Skib * Redistribution and use in source and binary forms, with or without 10237434Skib * modification, are permitted provided that the following conditions 11237434Skib * are met: 12237434Skib * 1. Redistributions of source code must retain the above copyright 13237434Skib * notice, this list of conditions and the following disclaimer. 14237434Skib * 2. Redistributions in binary form must reproduce the above copyright 15237434Skib * notice, this list of conditions and the following disclaimer in the 16237434Skib * documentation and/or other materials provided with the distribution. 17237434Skib * 18237434Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19237434Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20237434Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21237434Skib * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22237434Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23237434Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24237434Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25237434Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26237434Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27237434Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28237434Skib * SUCH DAMAGE. 29237434Skib */ 30237434Skib 31237434Skib#include <sys/cdefs.h> 32237434Skib__FBSDID("$FreeBSD: stable/11/lib/libc/x86/sys/__vdso_gettc.c 344158 2019-02-15 11:36:16Z kib $"); 33237434Skib 34304285Skib#include <sys/param.h> 35304285Skib#include "namespace.h" 36321650Skib#include <sys/capsicum.h> 37246117Skib#include <sys/elf.h> 38304285Skib#include <sys/fcntl.h> 39304285Skib#include <sys/mman.h> 40237434Skib#include <sys/time.h> 41237434Skib#include <sys/vdso.h> 42304285Skib#include <errno.h> 43304285Skib#include <string.h> 44304285Skib#include <unistd.h> 45304285Skib#include "un-namespace.h" 46311927Skib#include <machine/atomic.h> 47237434Skib#include <machine/cpufunc.h> 48286284Skib#include <machine/specialreg.h> 49304285Skib#include <dev/acpica/acpi_hpet.h> 50313539Sngie#ifdef WANT_HYPERV 51311376Ssephe#include <dev/hyperv/hyperv.h> 52311376Ssephe#endif 53246117Skib#include "libc_private.h" 54237434Skib 55322345Skibstatic enum LMB { 56322345Skib LMB_UNKNOWN, 57322345Skib LMB_NONE, 58322345Skib LMB_MFENCE, 59322345Skib LMB_LFENCE 60322345Skib} lfence_works = LMB_UNKNOWN; 61322345Skib 62304285Skibstatic void 63322345Skibcpuidp(u_int leaf, u_int p[4]) 64286284Skib{ 65322345Skib 66322345Skib __asm __volatile( 67304285Skib#if defined(__i386__) 68322345Skib " pushl %%ebx\n" 69322345Skib#endif 70322345Skib " cpuid\n" 71322345Skib#if defined(__i386__) 72322345Skib " movl %%ebx,%1\n" 73322345Skib " popl %%ebx" 74322345Skib#endif 75322345Skib : "=a" (p[0]), 76322345Skib#if defined(__i386__) 77322345Skib "=r" (p[1]), 78322345Skib#elif defined(__amd64__) 79322345Skib "=b" (p[1]), 80322345Skib#else 81322345Skib#error "Arch" 82322345Skib#endif 83322345Skib "=c" (p[2]), "=d" (p[3]) 84322345Skib : "0" (leaf)); 85322345Skib} 86322345Skib 87322345Skibstatic enum LMB 88322345Skibselect_lmb(void) 89322345Skib{ 90322345Skib u_int p[4]; 91344158Skib /* Not a typo, string matches our cpuidp() registers use. */ 92322345Skib static const char intel_id[] = "GenuntelineI"; 93322345Skib 94322345Skib cpuidp(0, p); 95322345Skib return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ? 96322345Skib LMB_LFENCE : LMB_MFENCE); 97322345Skib} 98322345Skib 99322345Skibstatic void 100322345Skibinit_fence(void) 101322345Skib{ 102322345Skib#if defined(__i386__) 103286284Skib u_int cpuid_supported, p[4]; 104286284Skib 105322534Skib lfence_works = LMB_NONE; 106322345Skib __asm __volatile( 107322345Skib " pushfl\n" 108322345Skib " popl %%eax\n" 109322345Skib " movl %%eax,%%ecx\n" 110322345Skib " xorl $0x200000,%%eax\n" 111322345Skib " pushl %%eax\n" 112322345Skib " popfl\n" 113322345Skib " pushfl\n" 114322345Skib " popl %%eax\n" 115322345Skib " xorl %%eax,%%ecx\n" 116322345Skib " je 1f\n" 117322345Skib " movl $1,%0\n" 118322345Skib " jmp 2f\n" 119322345Skib "1: movl $0,%0\n" 120322345Skib "2:\n" 121322345Skib : "=r" (cpuid_supported) : : "eax", "ecx", "cc"); 122322345Skib if (cpuid_supported) { 123322345Skib cpuidp(0x1, p); 124322345Skib if ((p[3] & CPUID_SSE2) != 0) 125322345Skib lfence_works = select_lmb(); 126322534Skib } 127304285Skib#elif defined(__amd64__) 128322345Skib lfence_works = select_lmb(); 129304285Skib#else 130322345Skib#error "Arch" 131304285Skib#endif 132286284Skib} 133286284Skib 134322345Skibstatic void 135322345Skibrdtsc_mb(void) 136322345Skib{ 137322345Skib 138322345Skibagain: 139322345Skib if (__predict_true(lfence_works == LMB_LFENCE)) { 140322345Skib lfence(); 141322345Skib return; 142322345Skib } else if (lfence_works == LMB_MFENCE) { 143322345Skib mfence(); 144322345Skib return; 145322345Skib } else if (lfence_works == LMB_NONE) { 146322345Skib return; 147322345Skib } 148322345Skib init_fence(); 149322345Skib goto again; 150322345Skib} 151322345Skib 152237434Skibstatic u_int 153304285Skib__vdso_gettc_rdtsc_low(const struct vdso_timehands *th) 154237434Skib{ 155286284Skib u_int rv; 156237434Skib 157322345Skib rdtsc_mb(); 158237434Skib __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 159237434Skib : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); 160237434Skib return (rv); 161237434Skib} 162237434Skib 163286284Skibstatic u_int 164286284Skib__vdso_rdtsc32(void) 165286284Skib{ 166286284Skib 167322345Skib rdtsc_mb(); 168304285Skib return (rdtsc32()); 169286284Skib} 170286284Skib 171311927Skib#define HPET_DEV_MAP_MAX 10 172311927Skibstatic volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; 173304285Skib 174304285Skibstatic void 175304285Skib__vdso_init_hpet(uint32_t u) 176304285Skib{ 177304285Skib static const char devprefix[] = "/dev/hpet"; 178304285Skib char devname[64], *c, *c1, t; 179311927Skib volatile char *new_map, *old_map; 180321650Skib unsigned int mode; 181311927Skib uint32_t u1; 182304285Skib int fd; 183304285Skib 184304285Skib c1 = c = stpcpy(devname, devprefix); 185311927Skib u1 = u; 186304285Skib do { 187311927Skib *c++ = u1 % 10 + '0'; 188311927Skib u1 /= 10; 189311927Skib } while (u1 != 0); 190304285Skib *c = '\0'; 191304285Skib for (c--; c1 != c; c1++, c--) { 192304285Skib t = *c1; 193304285Skib *c1 = *c; 194304285Skib *c = t; 195304285Skib } 196311927Skib 197311927Skib old_map = hpet_dev_map[u]; 198311927Skib if (old_map != NULL) 199311927Skib return; 200311927Skib 201322042Skib /* 202322042Skib * Explicitely check for the capability mode to avoid 203322042Skib * triggering trap_enocap on the device open by absolute path. 204322042Skib */ 205322042Skib if ((cap_getmode(&mode) == 0 && mode != 0) || 206322042Skib (fd = _open(devname, O_RDONLY)) == -1) { 207322042Skib /* Prevent the caller from re-entering. */ 208322042Skib atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], 209322042Skib (uintptr_t)old_map, (uintptr_t)MAP_FAILED); 210322042Skib return; 211322042Skib } 212321650Skib 213311927Skib new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); 214304285Skib _close(fd); 215311927Skib if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], 216311927Skib (uintptr_t)old_map, (uintptr_t)new_map) == 0 && 217311927Skib new_map != MAP_FAILED) 218311927Skib munmap((void *)new_map, PAGE_SIZE); 219304285Skib} 220304285Skib 221313539Sngie#ifdef WANT_HYPERV 222311376Ssephe 223311376Ssephe#define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME 224311376Ssephe 225311376Ssephe/* 226311376Ssephe * NOTE: 227311376Ssephe * We use 'NULL' for this variable to indicate that initialization 228311376Ssephe * is required. And if this variable is 'MAP_FAILED', then Hyper-V 229311376Ssephe * reference TSC can not be used, e.g. in misconfigured jail. 230311376Ssephe */ 231311376Ssephestatic struct hyperv_reftsc *hyperv_ref_tsc; 232311376Ssephe 233311376Ssephestatic void 234311376Ssephe__vdso_init_hyperv_tsc(void) 235311376Ssephe{ 236311376Ssephe int fd; 237321650Skib unsigned int mode; 238311376Ssephe 239321650Skib if (cap_getmode(&mode) == 0 && mode != 0) 240321650Skib goto fail; 241321650Skib 242311376Ssephe fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY); 243321650Skib if (fd < 0) 244321650Skib goto fail; 245311376Ssephe hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, 246311376Ssephe MAP_SHARED, fd, 0); 247311376Ssephe _close(fd); 248321650Skib 249321650Skib return; 250321650Skibfail: 251321650Skib /* Prevent the caller from re-entering. */ 252321650Skib hyperv_ref_tsc = MAP_FAILED; 253311376Ssephe} 254311376Ssephe 255311376Ssephestatic int 256311376Ssephe__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) 257311376Ssephe{ 258311376Ssephe uint64_t disc, ret, tsc, scale; 259311376Ssephe uint32_t seq; 260311376Ssephe int64_t ofs; 261311376Ssephe 262311376Ssephe while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { 263311376Ssephe scale = tsc_ref->tsc_scale; 264311376Ssephe ofs = tsc_ref->tsc_ofs; 265311376Ssephe 266322345Skib rdtsc_mb(); 267311376Ssephe tsc = rdtsc(); 268311376Ssephe 269311376Ssephe /* ret = ((tsc * scale) >> 64) + ofs */ 270311376Ssephe __asm__ __volatile__ ("mulq %3" : 271311376Ssephe "=d" (ret), "=a" (disc) : 272311376Ssephe "a" (tsc), "r" (scale)); 273311376Ssephe ret += ofs; 274311376Ssephe 275311376Ssephe atomic_thread_fence_acq(); 276311376Ssephe if (tsc_ref->tsc_seq == seq) { 277311376Ssephe *tc = ret; 278311376Ssephe return (0); 279311376Ssephe } 280311376Ssephe 281311376Ssephe /* Sequence changed; re-sync. */ 282311376Ssephe } 283311376Ssephe return (ENOSYS); 284311376Ssephe} 285311376Ssephe 286313539Sngie#endif /* WANT_HYPERV */ 287311376Ssephe 288237434Skib#pragma weak __vdso_gettc 289304285Skibint 290304285Skib__vdso_gettc(const struct vdso_timehands *th, u_int *tc) 291237434Skib{ 292311927Skib volatile char *map; 293311927Skib uint32_t idx; 294237434Skib 295304285Skib switch (th->th_algo) { 296304285Skib case VDSO_TH_ALGO_X86_TSC: 297304285Skib *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : 298304285Skib __vdso_rdtsc32(); 299304285Skib return (0); 300304285Skib case VDSO_TH_ALGO_X86_HPET: 301311927Skib idx = th->th_x86_hpet_idx; 302311927Skib if (idx >= HPET_DEV_MAP_MAX) 303311927Skib return (ENOSYS); 304311927Skib map = (volatile char *)atomic_load_acq_ptr( 305311927Skib (volatile uintptr_t *)&hpet_dev_map[idx]); 306311927Skib if (map == NULL) { 307311927Skib __vdso_init_hpet(idx); 308311927Skib map = (volatile char *)atomic_load_acq_ptr( 309311927Skib (volatile uintptr_t *)&hpet_dev_map[idx]); 310304285Skib } 311311927Skib if (map == MAP_FAILED) 312304285Skib return (ENOSYS); 313311927Skib *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER); 314304285Skib return (0); 315313539Sngie#ifdef WANT_HYPERV 316311376Ssephe case VDSO_TH_ALGO_X86_HVTSC: 317311376Ssephe if (hyperv_ref_tsc == NULL) 318311376Ssephe __vdso_init_hyperv_tsc(); 319311376Ssephe if (hyperv_ref_tsc == MAP_FAILED) 320311376Ssephe return (ENOSYS); 321311376Ssephe return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); 322311376Ssephe#endif 323304285Skib default: 324304285Skib return (ENOSYS); 325304285Skib } 326237434Skib} 327246117Skib 328246117Skib#pragma weak __vdso_gettimekeep 329246117Skibint 330246117Skib__vdso_gettimekeep(struct vdso_timekeep **tk) 331246117Skib{ 332246117Skib 333246117Skib return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); 334246117Skib} 335