1/*- 2 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org> 3 * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation 4 * All rights reserved. 5 * 6 * Portions of this software were developed by Konstantin Belousov 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD$"); 33 34#include <sys/param.h> 35#include "namespace.h" 36#include <sys/capsicum.h> 37#include <sys/elf.h> 38#include <sys/fcntl.h> 39#include <sys/mman.h> 40#include <sys/time.h> 41#include <sys/vdso.h> 42#include <errno.h> 43#include <string.h> 44#include <unistd.h> 45#include "un-namespace.h" 46#include <machine/atomic.h> 47#include <machine/cpufunc.h> 48#include <machine/specialreg.h> 49#include <dev/acpica/acpi_hpet.h> 50#ifdef WANT_HYPERV 51#include <dev/hyperv/hyperv.h> 52#endif 53#include <x86/ifunc.h> 54#include "libc_private.h" 55 56static inline u_int 57rdtsc_low(const struct vdso_timehands *th) 58{ 59 u_int rv; 60 61 __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 62 : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); 63 return (rv); 64} 65 66static inline u_int 67rdtscp_low(const struct vdso_timehands *th) 68{ 69 u_int rv; 70 71 __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0" 72 : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx"); 73 return (rv); 74} 75 76static u_int 77rdtsc_low_mb_lfence(const struct vdso_timehands *th) 78{ 79 lfence(); 80 return (rdtsc_low(th)); 81} 82 83static u_int 84rdtsc_low_mb_mfence(const struct vdso_timehands *th) 85{ 86 mfence(); 87 return (rdtsc_low(th)); 88} 89 90static u_int 91rdtsc_low_mb_none(const struct vdso_timehands *th) 92{ 93 return (rdtsc_low(th)); 94} 95 96static u_int 97rdtsc32_mb_lfence(void) 98{ 99 lfence(); 100 return (rdtsc32()); 101} 102 103static u_int 104rdtsc32_mb_mfence(void) 105{ 106 mfence(); 107 return (rdtsc32()); 108} 109 110static u_int 111rdtsc32_mb_none(void) 112{ 113 return (rdtsc32()); 114} 115 116static u_int 117rdtscp32_(void) 118{ 119 return (rdtscp32()); 120} 121 122struct tsc_selector_tag { 123 u_int (*ts_rdtsc32)(void); 124 u_int (*ts_rdtsc_low)(const struct vdso_timehands *); 125}; 126 127static const struct tsc_selector_tag tsc_selector[] = { 128 [0] = { /* Intel, LFENCE */ 129 .ts_rdtsc32 = rdtsc32_mb_lfence, 130 .ts_rdtsc_low = rdtsc_low_mb_lfence, 131 }, 132 [1] = { /* AMD, MFENCE */ 133 .ts_rdtsc32 = rdtsc32_mb_mfence, 134 .ts_rdtsc_low = rdtsc_low_mb_mfence, 135 }, 136 [2] = { /* No SSE2 */ 137 .ts_rdtsc32 = rdtsc32_mb_none, 138 .ts_rdtsc_low = rdtsc_low_mb_none, 139 }, 140 [3] = { /* RDTSCP */ 141 .ts_rdtsc32 = rdtscp32_, 142 .ts_rdtsc_low = rdtscp_low, 143 }, 144}; 145 146static int 147tsc_selector_idx(u_int cpu_feature) 148{ 149 u_int amd_feature, cpu_exthigh, cpu_id, p[4], v[3]; 150 static const char amd_id[] = "AuthenticAMD"; 151 static const char hygon_id[] = "HygonGenuine"; 152 bool amd_cpu; 153 154 if (cpu_feature == 0) 155 return (2); /* should not happen due to RDTSC */ 156 157 do_cpuid(0, p); 158 v[0] = p[1]; 159 v[1] = p[3]; 160 v[2] = p[2]; 161 amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 || 162 memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0; 163 164 do_cpuid(1, p); 165 cpu_id = p[0]; 166 167 if (cpu_feature != 0) { 168 do_cpuid(0x80000000, p); 169 cpu_exthigh = p[0]; 170 } else { 171 cpu_exthigh = 0; 172 } 173 if (cpu_exthigh >= 0x80000001) { 174 do_cpuid(0x80000001, p); 175 amd_feature = p[3]; 176 } else { 177 amd_feature = 0; 178 } 179 180 if ((amd_feature & AMDID_RDTSCP) != 0) 181 return (3); 182 if ((cpu_feature & CPUID_SSE2) == 0) 183 return (2); 184 return (amd_cpu ? 1 : 0); 185} 186 187DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low, 188 (const struct vdso_timehands *th), static) 189{ 190 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low); 191} 192 193DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void), static) 194{ 195 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32); 196} 197 198#define HPET_DEV_MAP_MAX 10 199static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; 200 201static void 202__vdso_init_hpet(uint32_t u) 203{ 204 static const char devprefix[] = "/dev/hpet"; 205 char devname[64], *c, *c1, t; 206 volatile char *new_map, *old_map; 207 unsigned int mode; 208 uint32_t u1; 209 int fd; 210 211 c1 = c = stpcpy(devname, devprefix); 212 u1 = u; 213 do { 214 *c++ = u1 % 10 + '0'; 215 u1 /= 10; 216 } while (u1 != 0); 217 *c = '\0'; 218 for (c--; c1 != c; c1++, c--) { 219 t = *c1; 220 *c1 = *c; 221 *c = t; 222 } 223 224 old_map = hpet_dev_map[u]; 225 if (old_map != NULL) 226 return; 227 228 /* 229 * Explicitely check for the capability mode to avoid 230 * triggering trap_enocap on the device open by absolute path. 231 */ 232 if ((cap_getmode(&mode) == 0 && mode != 0) || 233 (fd = _open(devname, O_RDONLY)) == -1) { 234 /* Prevent the caller from re-entering. */ 235 atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], 236 (uintptr_t)old_map, (uintptr_t)MAP_FAILED); 237 return; 238 } 239 240 new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); 241 _close(fd); 242 if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], 243 (uintptr_t)old_map, (uintptr_t)new_map) == 0 && 244 new_map != MAP_FAILED) 245 munmap((void *)new_map, PAGE_SIZE); 246} 247 248#ifdef WANT_HYPERV 249 250#define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME 251 252/* 253 * NOTE: 254 * We use 'NULL' for this variable to indicate that initialization 255 * is required. And if this variable is 'MAP_FAILED', then Hyper-V 256 * reference TSC can not be used, e.g. in misconfigured jail. 257 */ 258static struct hyperv_reftsc *hyperv_ref_tsc; 259 260static void 261__vdso_init_hyperv_tsc(void) 262{ 263 int fd; 264 unsigned int mode; 265 266 if (cap_getmode(&mode) == 0 && mode != 0) 267 goto fail; 268 269 fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY); 270 if (fd < 0) 271 goto fail; 272 hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, 273 MAP_SHARED, fd, 0); 274 _close(fd); 275 276 return; 277fail: 278 /* Prevent the caller from re-entering. */ 279 hyperv_ref_tsc = MAP_FAILED; 280} 281 282static int 283__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) 284{ 285 uint64_t disc, ret, tsc, scale; 286 uint32_t seq; 287 int64_t ofs; 288 289 while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { 290 scale = tsc_ref->tsc_scale; 291 ofs = tsc_ref->tsc_ofs; 292 293 mfence(); /* XXXKIB */ 294 tsc = rdtsc(); 295 296 /* ret = ((tsc * scale) >> 64) + ofs */ 297 __asm__ __volatile__ ("mulq %3" : 298 "=d" (ret), "=a" (disc) : 299 "a" (tsc), "r" (scale)); 300 ret += ofs; 301 302 atomic_thread_fence_acq(); 303 if (tsc_ref->tsc_seq == seq) { 304 *tc = ret; 305 return (0); 306 } 307 308 /* Sequence changed; re-sync. */ 309 } 310 return (ENOSYS); 311} 312 313#endif /* WANT_HYPERV */ 314 315#pragma weak __vdso_gettc 316int 317__vdso_gettc(const struct vdso_timehands *th, u_int *tc) 318{ 319 volatile char *map; 320 uint32_t idx; 321 322 switch (th->th_algo) { 323 case VDSO_TH_ALGO_X86_TSC: 324 *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : 325 __vdso_gettc_rdtsc32(); 326 return (0); 327 case VDSO_TH_ALGO_X86_HPET: 328 idx = th->th_x86_hpet_idx; 329 if (idx >= HPET_DEV_MAP_MAX) 330 return (ENOSYS); 331 map = (volatile char *)atomic_load_acq_ptr( 332 (volatile uintptr_t *)&hpet_dev_map[idx]); 333 if (map == NULL) { 334 __vdso_init_hpet(idx); 335 map = (volatile char *)atomic_load_acq_ptr( 336 (volatile uintptr_t *)&hpet_dev_map[idx]); 337 } 338 if (map == MAP_FAILED) 339 return (ENOSYS); 340 *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER); 341 return (0); 342#ifdef WANT_HYPERV 343 case VDSO_TH_ALGO_X86_HVTSC: 344 if (hyperv_ref_tsc == NULL) 345 __vdso_init_hyperv_tsc(); 346 if (hyperv_ref_tsc == MAP_FAILED) 347 return (ENOSYS); 348 return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); 349#endif 350 default: 351 return (ENOSYS); 352 } 353} 354 355#pragma weak __vdso_gettimekeep 356int 357__vdso_gettimekeep(struct vdso_timekeep **tk) 358{ 359 360 return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); 361} 362