pvclock.c revision 1.9
1/* $OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $ */ 2 3/* 4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#if !defined(__i386__) && !defined(__amd64__) 20#error pvclock(4) is only supported on i386 and amd64 21#endif 22 23#include <sys/param.h> 24#include <sys/systm.h> 25#include <sys/kernel.h> 26#include <sys/timetc.h> 27#include <sys/timeout.h> 28#include <sys/malloc.h> 29#include <sys/atomic.h> 30 31#include <machine/cpu.h> 32#include <machine/atomic.h> 33#include <uvm/uvm_extern.h> 34 35#include <dev/pv/pvvar.h> 36#include <dev/pv/pvreg.h> 37 38uint pvclock_lastcount; 39 40struct pvclock_softc { 41 struct device sc_dev; 42 void *sc_time; 43 paddr_t sc_paddr; 44 struct timecounter *sc_tc; 45}; 46 47#define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 48 49int pvclock_match(struct device *, void *, void *); 50void pvclock_attach(struct device *, struct device *, void *); 51int pvclock_activate(struct device *, int); 52 53uint pvclock_get_timecount(struct timecounter *); 54void pvclock_read_time_info(struct pvclock_softc *, 55 struct pvclock_time_info *); 56 57static inline uint32_t 58 pvclock_read_begin(const struct pvclock_time_info *); 59static inline int 60 pvclock_read_done(const struct pvclock_time_info *, uint32_t); 61 62const struct cfattach pvclock_ca = { 63 sizeof(struct pvclock_softc), 64 pvclock_match, 65 pvclock_attach, 66 NULL, 67 pvclock_activate 68}; 69 70struct cfdriver pvclock_cd = { 71 NULL, 72 "pvclock", 73 DV_DULL 74}; 75 76struct timecounter pvclock_timecounter = { 77 .tc_get_timecount = pvclock_get_timecount, 78 .tc_counter_mask = ~0u, 79 .tc_frequency = 0, 80 .tc_name = NULL, 81 .tc_quality = -2000, 82 .tc_priv = NULL, 83 .tc_user = 0, 84}; 85 86int 87pvclock_match(struct device *parent, void *match, void *aux) 88{ 89 struct pv_attach_args *pva = aux; 90 struct pvbus_hv *hv; 91 92 /* 93 * pvclock is provided by different hypervisors, we currently 94 * only support the "kvmclock". 95 */ 96 hv = &pva->pva_hv[PVBUS_KVM]; 97 if (hv->hv_base == 0) 98 hv = &pva->pva_hv[PVBUS_OPENBSD]; 99 if (hv->hv_base != 0) { 100 /* 101 * We only implement support for the 2nd version of pvclock. 102 * The first version is basically the same but with different 103 * non-standard MSRs and it is deprecated. 104 */ 105 if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0) 106 return (0); 107 108 /* 109 * Only the "stable" clock with a sync'ed TSC is supported. 110 * In this case the host guarantees that the TSC is constant 111 * and invariant, either by the underlying TSC or by passing 112 * on a synchronized value. 113 */ 114 if ((hv->hv_features & 115 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0) 116 return (0); 117 118 return (1); 119 } 120 121 return (0); 122} 123 124void 125pvclock_attach(struct device *parent, struct device *self, void *aux) 126{ 127 struct pvclock_softc *sc = (struct pvclock_softc *)self; 128 struct pvclock_time_info *ti; 129 paddr_t pa; 130 uint32_t version; 131 uint8_t flags; 132 133 if ((sc->sc_time = km_alloc(PAGE_SIZE, 134 &kv_any, &kp_zero, &kd_nowait)) == NULL) { 135 printf(": time page allocation failed\n"); 136 return; 137 } 138 if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) { 139 printf(": time page PA extraction failed\n"); 140 km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero); 141 return; 142 } 143 144 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 145 sc->sc_paddr = pa; 146 147 ti = sc->sc_time; 148 do { 149 version = pvclock_read_begin(ti); 150 flags = ti->ti_flags; 151 } while (!pvclock_read_done(ti, version)); 152 153 sc->sc_tc = &pvclock_timecounter; 154 sc->sc_tc->tc_name = DEVNAME(sc); 155 sc->sc_tc->tc_frequency = 1000000000ULL; 156 sc->sc_tc->tc_priv = sc; 157 158 pvclock_lastcount = 0; 159 160 /* Better than HPET but below TSC */ 161 sc->sc_tc->tc_quality = 1500; 162 163 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) { 164 /* if tsc is not stable, set a lower priority */ 165 /* Better than i8254 but below HPET */ 166 sc->sc_tc->tc_quality = 500; 167 } 168 169 tc_init(sc->sc_tc); 170 171 printf("\n"); 172} 173 174int 175pvclock_activate(struct device *self, int act) 176{ 177 struct pvclock_softc *sc = (struct pvclock_softc *)self; 178 int rv = 0; 179 paddr_t pa = sc->sc_paddr; 180 181 switch (act) { 182 case DVACT_POWERDOWN: 183 wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE); 184 break; 185 case DVACT_RESUME: 186 wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE); 187 break; 188 } 189 190 return (rv); 191} 192 193static inline uint32_t 194pvclock_read_begin(const struct pvclock_time_info *ti) 195{ 196 uint32_t version = ti->ti_version & ~0x1; 197 virtio_membar_sync(); 198 return (version); 199} 200 201static inline int 202pvclock_read_done(const struct pvclock_time_info *ti, 203 uint32_t version) 204{ 205 virtio_membar_sync(); 206 return (ti->ti_version == version); 207} 208 209uint 210pvclock_get_timecount(struct timecounter *tc) 211{ 212 struct pvclock_softc *sc = tc->tc_priv; 213 struct pvclock_time_info *ti; 214 uint64_t tsc_timestamp, system_time, delta, ctr; 215 uint32_t version, mul_frac; 216 int8_t shift; 217 uint8_t flags; 218 219 ti = sc->sc_time; 220 do { 221 version = pvclock_read_begin(ti); 222 system_time = ti->ti_system_time; 223 tsc_timestamp = ti->ti_tsc_timestamp; 224 mul_frac = ti->ti_tsc_to_system_mul; 225 shift = ti->ti_tsc_shift; 226 flags = ti->ti_flags; 227 } while (!pvclock_read_done(ti, version)); 228 229 /* 230 * The algorithm is described in 231 * linux/Documentation/virtual/kvm/msr.txt 232 */ 233 delta = rdtsc() - tsc_timestamp; 234 if (shift < 0) 235 delta >>= -shift; 236 else 237 delta <<= shift; 238 ctr = ((delta * mul_frac) >> 32) + system_time; 239 240 if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) 241 return (ctr); 242 243 if (ctr < pvclock_lastcount) 244 return (pvclock_lastcount); 245 246 atomic_swap_uint(&pvclock_lastcount, ctr); 247 248 return (ctr); 249} 250