1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org> 5 * Copyright (c) 2021 Mathieu Chouquet-Stringer 6 * Copyright (c) 2021 Juniper Networks, Inc. 7 * Copyright (c) 2021 Klara, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31/* 32 * Linux KVM paravirtual clock support 33 * 34 * References: 35 * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html 36 * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html 37 */ 38 39#include <sys/param.h> 40#include <sys/bus.h> 41#include <sys/domainset.h> 42#include <sys/kernel.h> 43#include <sys/malloc.h> 44#include <sys/module.h> 45#include <sys/smp.h> 46#include <sys/sysctl.h> 47 48#include <vm/vm.h> 49#include <vm/pmap.h> 50#include <vm/vm_extern.h> 51 52#include <machine/pvclock.h> 53#include <x86/kvm.h> 54 55#include "clock_if.h" 56 57#define KVM_CLOCK_DEVNAME "kvmclock" 58/* 59 * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's 60 * default value of 800, and (3) below the TSC's value when it supports the 61 * "Invariant TSC" feature and is believed to be synchronized across all CPUs. 62 */ 63#define KVM_CLOCK_TC_QUALITY 975 64 65struct kvm_clock_softc { 66 struct pvclock pvc; 67 struct pvclock_wall_clock wc; 68 struct pvclock_vcpu_time_info *timeinfos; 69 u_int msr_tc; 70 u_int msr_wc; 71#ifndef EARLY_AP_STARTUP 72 int firstcpu; 73#endif 74}; 75 76static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg); 77static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc, 78 const cpuset_t *cpus); 79static void kvm_clock_system_time_enable_pcpu(void *arg); 80static void kvm_clock_setup_sysctl(device_t); 81 82static struct pvclock_wall_clock * 83kvm_clock_get_wallclock(void *arg) 84{ 85 struct kvm_clock_softc *sc = arg; 86 87 wrmsr(sc->msr_wc, vtophys(&sc->wc)); 88 return (&sc->wc); 89} 90 91static void 92kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus) 93{ 94 smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu, 95 NULL, sc); 96} 97 98static void 99kvm_clock_system_time_enable_pcpu(void *arg) 100{ 101 struct kvm_clock_softc *sc = arg; 102 103 /* 104 * See [2]; the lsb of this MSR is the system time enable bit. 105 */ 106 wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1); 107} 108 109#ifndef EARLY_AP_STARTUP 110static void 111kvm_clock_init_smp(void *arg __unused) 112{ 113 devclass_t kvm_clock_devclass; 114 cpuset_t cpus; 115 struct kvm_clock_softc *sc; 116 117 kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME); 118 sc = devclass_get_softc(kvm_clock_devclass, 0); 119 if (sc == NULL || mp_ncpus == 1) 120 return; 121 122 /* 123 * Register with the hypervisor on all CPUs except the one that 124 * registered in kvm_clock_attach(). 125 */ 126 cpus = all_cpus; 127 KASSERT(CPU_ISSET(sc->firstcpu, &cpus), 128 ("%s: invalid first CPU %d", __func__, sc->firstcpu)); 129 CPU_CLR(sc->firstcpu, &cpus); 130 kvm_clock_system_time_enable(sc, &cpus); 131} 132SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL); 133#endif 134 135static void 136kvm_clock_identify(driver_t *driver, device_t parent) 137{ 138 u_int regs[4]; 139 140 kvm_cpuid_get_features(regs); 141 if ((regs[0] & 142 (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0) 143 return; 144 if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1)) 145 return; 146 BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0); 147} 148 149static int 150kvm_clock_probe(device_t dev) 151{ 152 device_set_desc(dev, "KVM paravirtual clock"); 153 return (BUS_PROBE_DEFAULT); 154} 155 156static int 157kvm_clock_attach(device_t dev) 158{ 159 u_int regs[4]; 160 struct kvm_clock_softc *sc = device_get_softc(dev); 161 bool stable_flag_supported; 162 163 /* Process KVM "features" CPUID leaf content: */ 164 kvm_cpuid_get_features(regs); 165 if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) { 166 sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW; 167 sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW; 168 } else { 169 KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0, 170 ("Clocksource feature flags disappeared since " 171 "kvm_clock_identify: regs[0] %#0x.", regs[0])); 172 sc->msr_tc = KVM_MSR_SYSTEM_TIME; 173 sc->msr_wc = KVM_MSR_WALL_CLOCK; 174 } 175 stable_flag_supported = 176 (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0; 177 178 /* Set up 'struct pvclock_vcpu_time_info' page(s): */ 179 sc->timeinfos = kmem_malloc(mp_ncpus * 180 sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO); 181#ifdef EARLY_AP_STARTUP 182 kvm_clock_system_time_enable(sc, &all_cpus); 183#else 184 sc->firstcpu = curcpu; 185 kvm_clock_system_time_enable_pcpu(sc); 186#endif 187 188 /* 189 * Init pvclock; register KVM clock wall clock, register KVM clock 190 * timecounter, and set up the requisite infrastructure for vDSO access 191 * to this timecounter. 192 * Regarding 'tc_flags': Since the KVM MSR documentation does not 193 * specifically discuss suspend/resume scenarios, conservatively 194 * leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system 195 * time must be re-inited in such cases. 196 */ 197 sc->pvc.get_wallclock = kvm_clock_get_wallclock; 198 sc->pvc.get_wallclock_arg = sc; 199 sc->pvc.timeinfos = sc->timeinfos; 200 sc->pvc.stable_flag_supported = stable_flag_supported; 201 pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0); 202 kvm_clock_setup_sysctl(dev); 203 return (0); 204} 205 206static int 207kvm_clock_detach(device_t dev) 208{ 209 struct kvm_clock_softc *sc = device_get_softc(dev); 210 211 return (pvclock_destroy(&sc->pvc)); 212} 213 214static int 215kvm_clock_suspend(device_t dev) 216{ 217 return (0); 218} 219 220static int 221kvm_clock_resume(device_t dev) 222{ 223 /* 224 * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE'; 225 * conservatively assume that the system time must be re-inited in 226 * suspend/resume scenarios. 227 */ 228 kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus); 229 pvclock_resume(); 230 inittodr(time_second); 231 return (0); 232} 233 234static int 235kvm_clock_gettime(device_t dev, struct timespec *ts) 236{ 237 struct kvm_clock_softc *sc = device_get_softc(dev); 238 239 pvclock_gettime(&sc->pvc, ts); 240 return (0); 241} 242 243static int 244kvm_clock_settime(device_t dev, struct timespec *ts) 245{ 246 /* 247 * Even though it is not possible to set the KVM clock's wall clock, to 248 * avoid the possibility of periodic benign error messages from 249 * 'settime_task_func()', report success rather than, e.g., 'ENODEV'. 250 */ 251 return (0); 252} 253 254static int 255kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS) 256{ 257 struct kvm_clock_softc *sc = oidp->oid_arg1; 258 uint64_t freq = pvclock_tsc_freq(sc->timeinfos); 259 260 return (sysctl_handle_64(oidp, &freq, 0, req)); 261} 262 263static void 264kvm_clock_setup_sysctl(device_t dev) 265{ 266 struct kvm_clock_softc *sc = device_get_softc(dev); 267 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 268 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 269 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 270 271 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq", 272 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 273 kvm_clock_tsc_freq_sysctl, "QU", 274 "Time Stamp Counter frequency"); 275} 276 277static device_method_t kvm_clock_methods[] = { 278 DEVMETHOD(device_identify, kvm_clock_identify), 279 DEVMETHOD(device_probe, kvm_clock_probe), 280 DEVMETHOD(device_attach, kvm_clock_attach), 281 DEVMETHOD(device_detach, kvm_clock_detach), 282 DEVMETHOD(device_suspend, kvm_clock_suspend), 283 DEVMETHOD(device_resume, kvm_clock_resume), 284 /* clock interface */ 285 DEVMETHOD(clock_gettime, kvm_clock_gettime), 286 DEVMETHOD(clock_settime, kvm_clock_settime), 287 288 DEVMETHOD_END 289}; 290 291static driver_t kvm_clock_driver = { 292 KVM_CLOCK_DEVNAME, 293 kvm_clock_methods, 294 sizeof(struct kvm_clock_softc), 295}; 296 297DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0); 298